1 /* motion.c, motion estimation */
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
6 * Disclaimer of Warranty
8 * These software programs are available to the user without any license fee or
9 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
10 * any and all warranties, whether express, implied, or statuary, including any
11 * implied warranties or merchantability or of fitness for a particular
12 * purpose. In no event shall the copyright-holder be liable for any
13 * incidental, punitive, or consequential damages of any kind whatsoever
14 * arising from the use of these programs.
16 * This disclaimer of warranty extends to the user of these programs and user's
17 * customers, employees, agents, transferees, successors, and assigns.
19 * The MPEG Software Simulation Group does not represent or warrant that the
20 * programs furnished hereunder are free of infringement of any third-party
23 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24 * are subject to royalty fees to patent holders. Many of these patents are
25 * general enough such that they are unavoidable regardless of implementation
34 /* private prototypes */
36 static void frame_ME _ANSI_ARGS_((unsigned char *oldorg, unsigned char *neworg,
37 unsigned char *oldref, unsigned char *newref, unsigned char *cur,
38 int i, int j, int sxf, int syf, int sxb, int syb, struct mbinfo *mbi));
40 static void field_ME _ANSI_ARGS_((unsigned char *oldorg, unsigned char *neworg,
41 unsigned char *oldref, unsigned char *newref, unsigned char *cur,
42 unsigned char *curref, int i, int j, int sxf, int syf, int sxb, int syb,
43 struct mbinfo *mbi, int secondfield, int ipflag));
45 static void frame_estimate _ANSI_ARGS_((unsigned char *org,
46 unsigned char *ref, unsigned char *mb,
48 int sx, int sy, int *iminp, int *jminp, int *imintp, int *jmintp,
49 int *iminbp, int *jminbp, int *dframep, int *dfieldp,
50 int *tselp, int *bselp, int imins[2][2], int jmins[2][2]));
52 static void field_estimate _ANSI_ARGS_((unsigned char *toporg,
53 unsigned char *topref, unsigned char *botorg, unsigned char *botref,
54 unsigned char *mb, int i, int j, int sx, int sy, int ipflag,
55 int *iminp, int *jminp, int *imin8up, int *jmin8up, int *imin8lp,
56 int *jmin8lp, int *dfieldp, int *d8p, int *selp, int *sel8up, int *sel8lp,
57 int *iminsp, int *jminsp, int *dsp));
59 static void dpframe_estimate _ANSI_ARGS_((unsigned char *ref,
60 unsigned char *mb, int i, int j, int iminf[2][2], int jminf[2][2],
61 int *iminp, int *jminp, int *imindmvp, int *jmindmvp,
62 int *dmcp, int *vmcp));
64 static void dpfield_estimate _ANSI_ARGS_((unsigned char *topref,
65 unsigned char *botref, unsigned char *mb,
66 int i, int j, int imins, int jmins, int *imindmvp, int *jmindmvp,
67 int *dmcp, int *vmcp));
69 static int fullsearch _ANSI_ARGS_((unsigned char *org, unsigned char *ref,
71 int lx, int i0, int j0, int sx, int sy, int h, int xmax, int ymax,
72 int *iminp, int *jminp));
74 static int dist1 _ANSI_ARGS_((unsigned char *blk1, unsigned char *blk2,
75 int lx, int hx, int hy, int h, int distlim));
77 static int dist2 _ANSI_ARGS_((unsigned char *blk1, unsigned char *blk2,
78 int lx, int hx, int hy, int h));
80 static int bdist1 _ANSI_ARGS_((unsigned char *pf, unsigned char *pb,
81 unsigned char *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h));
83 static int bdist2 _ANSI_ARGS_((unsigned char *pf, unsigned char *pb,
84 unsigned char *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h));
86 static int variance _ANSI_ARGS_((unsigned char *p, int lx));
89 * motion estimation for progressive and interlaced frame pictures
91 * oldorg: source frame for forward prediction (used for P and B frames)
92 * neworg: source frame for backward prediction (B frames only)
93 * oldref: reconstructed frame for forward prediction (P and B frames)
94 * newref: reconstructed frame for backward prediction (B frames only)
95 * cur: current frame (the one for which the prediction is formed)
96 * sxf,syf: forward search window (frame coordinates)
97 * sxb,syb: backward search window (frame coordinates)
98 * mbi: pointer to macroblock info structure
102 * mb_type: 0, MB_INTRA, MB_FORWARD, MB_BACKWARD, MB_FORWARD|MB_BACKWARD
103 * MV[][][]: motion vectors (frame format)
104 * mv_field_sel: top/bottom field (for field prediction)
105 * motion_type: MC_FRAME, MC_FIELD
107 * uses global vars: pict_type, frame_pred_dct
109 void motion_estimation(oldorg,neworg,oldref,newref,cur,curref,
110 sxf,syf,sxb,syb,mbi,secondfield,ipflag)
111 unsigned char *oldorg,*neworg,*oldref,*newref,*cur,*curref;
114 int secondfield,ipflag;
118 /* loop through all macroblocks of the picture */
119 for (j=0; j<height2; j+=16)
121 for (i=0; i<width; i+=16)
123 if (pict_struct==FRAME_PICTURE)
124 frame_ME(oldorg,neworg,oldref,newref,cur,i,j,sxf,syf,sxb,syb,mbi);
126 field_ME(oldorg,neworg,oldref,newref,cur,curref,i,j,sxf,syf,sxb,syb,
127 mbi,secondfield,ipflag);
138 // putc('\r',stderr);
141 static void frame_ME(oldorg,neworg,oldref,newref,cur,i,j,sxf,syf,sxb,syb,mbi)
142 unsigned char *oldorg,*neworg,*oldref,*newref,*cur;
143 int i,j,sxf,syf,sxb,syb;
146 int imin,jmin,iminf,jminf,iminr,jminr;
147 int imint,jmint,iminb,jminb;
148 int imintf,jmintf,iminbf,jminbf;
149 int imintr,jmintr,iminbr,jminbr;
151 int dmc,dmcf,dmcr,dmci,vmc,vmcf,vmcr,vmci;
152 int dmcfield,dmcfieldf,dmcfieldr,dmcfieldi;
153 int tsel,bsel,tself,bself,tselr,bselr;
155 int imins[2][2],jmins[2][2];
156 int imindp,jmindp,imindmv,jmindmv,dmc_dp,vmc_dp;
158 mb = cur + i + width*j;
160 var = variance(mb,width);
162 if (pict_type==I_TYPE)
163 mbi->mb_type = MB_INTRA;
164 else if (pict_type==P_TYPE)
168 dmc = fullsearch(oldorg,oldref,mb,
169 width,i,j,sxf,syf,16,width,height,&imin,&jmin);
170 vmc = dist2(oldref+(imin>>1)+width*(jmin>>1),mb,
171 width,imin&1,jmin&1,16);
172 mbi->motion_type = MC_FRAME;
176 frame_estimate(oldorg,oldref,mb,i,j,sxf,syf,
177 &imin,&jmin,&imint,&jmint,&iminb,&jminb,
178 &dmc,&dmcfield,&tsel,&bsel,imins,jmins);
181 dpframe_estimate(oldref,mb,i,j>>1,imins,jmins,
182 &imindp,&jmindp,&imindmv,&jmindmv,&dmc_dp,&vmc_dp);
184 /* select between dual prime, frame and field prediction */
185 if (M==1 && dmc_dp<dmc && dmc_dp<dmcfield)
187 mbi->motion_type = MC_DMV;
191 else if (dmc<=dmcfield)
193 mbi->motion_type = MC_FRAME;
194 vmc = dist2(oldref+(imin>>1)+width*(jmin>>1),mb,
195 width,imin&1,jmin&1,16);
199 mbi->motion_type = MC_FIELD;
201 vmc = dist2(oldref+(tsel?width:0)+(imint>>1)+(width<<1)*(jmint>>1),
202 mb,width<<1,imint&1,jmint&1,8);
203 vmc+= dist2(oldref+(bsel?width:0)+(iminb>>1)+(width<<1)*(jminb>>1),
204 mb+width,width<<1,iminb&1,jminb&1,8);
208 /* select between intra or non-intra coding:
210 * selection is based on intra block variance (var) vs.
211 * prediction error variance (vmc)
213 * blocks with small prediction error are always coded non-intra
214 * even if variance is smaller (is this reasonable?)
216 if (vmc>var && vmc>=9*256)
217 mbi->mb_type = MB_INTRA;
220 /* select between MC / No-MC
222 * use No-MC if var(No-MC) <= 1.25*var(MC)
223 * (i.e slightly biased towards No-MC)
225 * blocks with small prediction error are always coded as No-MC
226 * (requires no motion vectors, allows skipping)
228 v0 = dist2(oldref+i+width*j,mb,width,0,0,16);
229 if (4*v0>5*vmc && v0>=9*256)
233 mbi->mb_type = MB_FORWARD;
234 if (mbi->motion_type==MC_FRAME)
236 mbi->MV[0][0][0] = imin - (i<<1);
237 mbi->MV[0][0][1] = jmin - (j<<1);
239 else if (mbi->motion_type==MC_DMV)
241 /* these are FRAME vectors */
242 /* same parity vector */
243 mbi->MV[0][0][0] = imindp - (i<<1);
244 mbi->MV[0][0][1] = (jmindp<<1) - (j<<1);
246 /* opposite parity vector */
247 mbi->dmvector[0] = imindmv;
248 mbi->dmvector[1] = jmindmv;
252 /* these are FRAME vectors */
253 mbi->MV[0][0][0] = imint - (i<<1);
254 mbi->MV[0][0][1] = (jmint<<1) - (j<<1);
255 mbi->MV[1][0][0] = iminb - (i<<1);
256 mbi->MV[1][0][1] = (jminb<<1) - (j<<1);
257 mbi->mv_field_sel[0][0] = tsel;
258 mbi->mv_field_sel[1][0] = bsel;
266 mbi->motion_type = MC_FRAME;
267 mbi->MV[0][0][0] = 0;
268 mbi->MV[0][0][1] = 0;
272 else /* if (pict_type==B_TYPE) */
277 dmcf = fullsearch(oldorg,oldref,mb,
278 width,i,j,sxf,syf,16,width,height,&iminf,&jminf);
279 vmcf = dist2(oldref+(iminf>>1)+width*(jminf>>1),mb,
280 width,iminf&1,jminf&1,16);
283 dmcr = fullsearch(neworg,newref,mb,
284 width,i,j,sxb,syb,16,width,height,&iminr,&jminr);
285 vmcr = dist2(newref+(iminr>>1)+width*(jminr>>1),mb,
286 width,iminr&1,jminr&1,16);
288 /* interpolated (bidirectional) */
289 vmci = bdist2(oldref+(iminf>>1)+width*(jminf>>1),
290 newref+(iminr>>1)+width*(jminr>>1),
291 mb,width,iminf&1,jminf&1,iminr&1,jminr&1,16);
295 /* select between forward/backward/interpolated prediction:
296 * use the one with smallest mean sqaured prediction error
298 if (vmcf<=vmcr && vmcf<=vmci)
301 mbi->mb_type = MB_FORWARD;
306 mbi->mb_type = MB_BACKWARD;
311 mbi->mb_type = MB_FORWARD|MB_BACKWARD;
314 mbi->motion_type = MC_FRAME;
318 /* forward prediction */
319 frame_estimate(oldorg,oldref,mb,i,j,sxf,syf,
320 &iminf,&jminf,&imintf,&jmintf,&iminbf,&jminbf,
321 &dmcf,&dmcfieldf,&tself,&bself,imins,jmins);
323 /* backward prediction */
324 frame_estimate(neworg,newref,mb,i,j,sxb,syb,
325 &iminr,&jminr,&imintr,&jmintr,&iminbr,&jminbr,
326 &dmcr,&dmcfieldr,&tselr,&bselr,imins,jmins);
328 /* calculate interpolated distance */
330 dmci = bdist1(oldref+(iminf>>1)+width*(jminf>>1),
331 newref+(iminr>>1)+width*(jminr>>1),
332 mb,width,iminf&1,jminf&1,iminr&1,jminr&1,16);
336 oldref+(imintf>>1)+(tself?width:0)+(width<<1)*(jmintf>>1),
337 newref+(imintr>>1)+(tselr?width:0)+(width<<1)*(jmintr>>1),
338 mb,width<<1,imintf&1,jmintf&1,imintr&1,jmintr&1,8);
342 oldref+(iminbf>>1)+(bself?width:0)+(width<<1)*(jminbf>>1),
343 newref+(iminbr>>1)+(bselr?width:0)+(width<<1)*(jminbr>>1),
344 mb+width,width<<1,iminbf&1,jminbf&1,iminbr&1,jminbr&1,8);
346 /* select prediction type of minimum distance from the
347 * six candidates (field/frame * forward/backward/interpolated)
349 if (dmci<dmcfieldi && dmci<dmcf && dmci<dmcfieldf
350 && dmci<dmcr && dmci<dmcfieldr)
352 /* frame, interpolated */
353 mbi->mb_type = MB_FORWARD|MB_BACKWARD;
354 mbi->motion_type = MC_FRAME;
355 vmc = bdist2(oldref+(iminf>>1)+width*(jminf>>1),
356 newref+(iminr>>1)+width*(jminr>>1),
357 mb,width,iminf&1,jminf&1,iminr&1,jminr&1,16);
359 else if (dmcfieldi<dmcf && dmcfieldi<dmcfieldf
360 && dmcfieldi<dmcr && dmcfieldi<dmcfieldr)
362 /* field, interpolated */
363 mbi->mb_type = MB_FORWARD|MB_BACKWARD;
364 mbi->motion_type = MC_FIELD;
365 vmc = bdist2(oldref+(imintf>>1)+(tself?width:0)+(width<<1)*(jmintf>>1),
366 newref+(imintr>>1)+(tselr?width:0)+(width<<1)*(jmintr>>1),
367 mb,width<<1,imintf&1,jmintf&1,imintr&1,jmintr&1,8);
368 vmc+= bdist2(oldref+(iminbf>>1)+(bself?width:0)+(width<<1)*(jminbf>>1),
369 newref+(iminbr>>1)+(bselr?width:0)+(width<<1)*(jminbr>>1),
370 mb+width,width<<1,iminbf&1,jminbf&1,iminbr&1,jminbr&1,8);
372 else if (dmcf<dmcfieldf && dmcf<dmcr && dmcf<dmcfieldr)
375 mbi->mb_type = MB_FORWARD;
376 mbi->motion_type = MC_FRAME;
377 vmc = dist2(oldref+(iminf>>1)+width*(jminf>>1),mb,
378 width,iminf&1,jminf&1,16);
380 else if (dmcfieldf<dmcr && dmcfieldf<dmcfieldr)
383 mbi->mb_type = MB_FORWARD;
384 mbi->motion_type = MC_FIELD;
385 vmc = dist2(oldref+(tself?width:0)+(imintf>>1)+(width<<1)*(jmintf>>1),
386 mb,width<<1,imintf&1,jmintf&1,8);
387 vmc+= dist2(oldref+(bself?width:0)+(iminbf>>1)+(width<<1)*(jminbf>>1),
388 mb+width,width<<1,iminbf&1,jminbf&1,8);
390 else if (dmcr<dmcfieldr)
392 /* frame, backward */
393 mbi->mb_type = MB_BACKWARD;
394 mbi->motion_type = MC_FRAME;
395 vmc = dist2(newref+(iminr>>1)+width*(jminr>>1),mb,
396 width,iminr&1,jminr&1,16);
400 /* field, backward */
401 mbi->mb_type = MB_BACKWARD;
402 mbi->motion_type = MC_FIELD;
403 vmc = dist2(newref+(tselr?width:0)+(imintr>>1)+(width<<1)*(jmintr>>1),
404 mb,width<<1,imintr&1,jmintr&1,8);
405 vmc+= dist2(newref+(bselr?width:0)+(iminbr>>1)+(width<<1)*(jminbr>>1),
406 mb+width,width<<1,iminbr&1,jminbr&1,8);
410 /* select between intra or non-intra coding:
412 * selection is based on intra block variance (var) vs.
413 * prediction error variance (vmc)
415 * blocks with small prediction error are always coded non-intra
416 * even if variance is smaller (is this reasonable?)
418 if (vmc>var && vmc>=9*256)
419 mbi->mb_type = MB_INTRA;
423 if (mbi->motion_type==MC_FRAME)
426 mbi->MV[0][0][0] = iminf - (i<<1);
427 mbi->MV[0][0][1] = jminf - (j<<1);
429 mbi->MV[0][1][0] = iminr - (i<<1);
430 mbi->MV[0][1][1] = jminr - (j<<1);
434 /* these are FRAME vectors */
436 mbi->MV[0][0][0] = imintf - (i<<1);
437 mbi->MV[0][0][1] = (jmintf<<1) - (j<<1);
438 mbi->MV[1][0][0] = iminbf - (i<<1);
439 mbi->MV[1][0][1] = (jminbf<<1) - (j<<1);
440 mbi->mv_field_sel[0][0] = tself;
441 mbi->mv_field_sel[1][0] = bself;
443 mbi->MV[0][1][0] = imintr - (i<<1);
444 mbi->MV[0][1][1] = (jmintr<<1) - (j<<1);
445 mbi->MV[1][1][0] = iminbr - (i<<1);
446 mbi->MV[1][1][1] = (jminbr<<1) - (j<<1);
447 mbi->mv_field_sel[0][1] = tselr;
448 mbi->mv_field_sel[1][1] = bselr;
457 * motion estimation for field pictures
459 * oldorg: original frame for forward prediction (P and B frames)
460 * neworg: original frame for backward prediction (B frames only)
461 * oldref: reconstructed frame for forward prediction (P and B frames)
462 * newref: reconstructed frame for backward prediction (B frames only)
463 * cur: current original frame (the one for which the prediction is formed)
464 * curref: current reconstructed frame (to predict second field from first)
465 * sxf,syf: forward search window (frame coordinates)
466 * sxb,syb: backward search window (frame coordinates)
467 * mbi: pointer to macroblock info structure
468 * secondfield: indicates second field of a frame (in P fields this means
469 * that reference field of opposite parity is in curref instead
471 * ipflag: indicates a P type field which is the second field of a frame
472 * in which the first field is I type (this restricts predictions
473 * to be based only on the opposite parity (=I) field)
477 * mb_type: 0, MB_INTRA, MB_FORWARD, MB_BACKWARD, MB_FORWARD|MB_BACKWARD
478 * MV[][][]: motion vectors (field format)
479 * mv_field_sel: top/bottom field
480 * motion_type: MC_FIELD, MC_16X8
482 * uses global vars: pict_type, pict_struct
484 static void field_ME(oldorg,neworg,oldref,newref,cur,curref,i,j,
485 sxf,syf,sxb,syb,mbi,secondfield,ipflag)
486 unsigned char *oldorg,*neworg,*oldref,*newref,*cur,*curref;
487 int i,j,sxf,syf,sxb,syb;
489 int secondfield,ipflag;
492 unsigned char *mb, *toporg, *topref, *botorg, *botref;
493 int var,vmc,v0,dmc,dmcfieldi,dmc8i;
494 int imin,jmin,imin8u,jmin8u,imin8l,jmin8l,dmcfield,dmc8,sel,sel8u,sel8l;
495 int iminf,jminf,imin8uf,jmin8uf,imin8lf,jmin8lf,dmcfieldf,dmc8f,self,sel8uf,sel8lf;
496 int iminr,jminr,imin8ur,jmin8ur,imin8lr,jmin8lr,dmcfieldr,dmc8r,selr,sel8ur,sel8lr;
497 int imins,jmins,ds,imindmv,jmindmv,vmc_dp,dmc_dp;
502 if (pict_struct==BOTTOM_FIELD)
505 var = variance(mb,w2);
507 if (pict_type==I_TYPE)
508 mbi->mb_type = MB_INTRA;
509 else if (pict_type==P_TYPE)
513 botorg = oldorg + width;
514 botref = oldref + width;
518 /* opposite parity field is in same frame */
519 if (pict_struct==TOP_FIELD)
521 /* current is top field */
522 botorg = cur + width;
523 botref = curref + width;
527 /* current is bottom field */
533 field_estimate(toporg,topref,botorg,botref,mb,i,j,sxf,syf,ipflag,
534 &imin,&jmin,&imin8u,&jmin8u,&imin8l,&jmin8l,
535 &dmcfield,&dmc8,&sel,&sel8u,&sel8l,&imins,&jmins,&ds);
537 if (M==1 && !ipflag) /* generic condition which permits Dual Prime */
538 dpfield_estimate(topref,botref,mb,i,j,imins,jmins,&imindmv,&jmindmv,
541 /* select between dual prime, field and 16x8 prediction */
542 if (M==1 && !ipflag && dmc_dp<dmc8 && dmc_dp<dmcfield)
544 /* Dual Prime prediction */
545 mbi->motion_type = MC_DMV;
546 dmc = dmc_dp; /* L1 metric */
547 vmc = vmc_dp; /* we already calculated L2 error for Dual */
550 else if (dmc8<dmcfield)
552 /* 16x8 prediction */
553 mbi->motion_type = MC_16X8;
554 /* upper half block */
555 vmc = dist2((sel8u?botref:topref) + (imin8u>>1) + w2*(jmin8u>>1),
556 mb,w2,imin8u&1,jmin8u&1,8);
557 /* lower half block */
558 vmc+= dist2((sel8l?botref:topref) + (imin8l>>1) + w2*(jmin8l>>1),
559 mb+8*w2,w2,imin8l&1,jmin8l&1,8);
563 /* field prediction */
564 mbi->motion_type = MC_FIELD;
565 vmc = dist2((sel?botref:topref) + (imin>>1) + w2*(jmin>>1),
566 mb,w2,imin&1,jmin&1,16);
569 /* select between intra and non-intra coding */
570 if (vmc>var && vmc>=9*256)
571 mbi->mb_type = MB_INTRA;
574 /* zero MV field prediction from same parity ref. field
575 * (not allowed if ipflag is set)
578 v0 = dist2(((pict_struct==BOTTOM_FIELD)?botref:topref) + i + w2*j,
580 if (ipflag || (4*v0>5*vmc && v0>=9*256))
583 mbi->mb_type = MB_FORWARD;
584 if (mbi->motion_type==MC_FIELD)
586 mbi->MV[0][0][0] = imin - (i<<1);
587 mbi->MV[0][0][1] = jmin - (j<<1);
588 mbi->mv_field_sel[0][0] = sel;
590 else if (mbi->motion_type==MC_DMV)
592 /* same parity vector */
593 mbi->MV[0][0][0] = imins - (i<<1);
594 mbi->MV[0][0][1] = jmins - (j<<1);
596 /* opposite parity vector */
597 mbi->dmvector[0] = imindmv;
598 mbi->dmvector[1] = jmindmv;
602 mbi->MV[0][0][0] = imin8u - (i<<1);
603 mbi->MV[0][0][1] = jmin8u - (j<<1);
604 mbi->MV[1][0][0] = imin8l - (i<<1);
605 mbi->MV[1][0][1] = jmin8l - ((j+8)<<1);
606 mbi->mv_field_sel[0][0] = sel8u;
607 mbi->mv_field_sel[1][0] = sel8l;
615 mbi->motion_type = MC_FIELD;
616 mbi->MV[0][0][0] = 0;
617 mbi->MV[0][0][1] = 0;
618 mbi->mv_field_sel[0][0] = (pict_struct==BOTTOM_FIELD);
622 else /* if (pict_type==B_TYPE) */
624 /* forward prediction */
625 field_estimate(oldorg,oldref,oldorg+width,oldref+width,mb,
627 &iminf,&jminf,&imin8uf,&jmin8uf,&imin8lf,&jmin8lf,
628 &dmcfieldf,&dmc8f,&self,&sel8uf,&sel8lf,&imins,&jmins,&ds);
630 /* backward prediction */
631 field_estimate(neworg,newref,neworg+width,newref+width,mb,
633 &iminr,&jminr,&imin8ur,&jmin8ur,&imin8lr,&jmin8lr,
634 &dmcfieldr,&dmc8r,&selr,&sel8ur,&sel8lr,&imins,&jmins,&ds);
636 /* calculate distances for bidirectional prediction */
638 dmcfieldi = bdist1(oldref + (self?width:0) + (iminf>>1) + w2*(jminf>>1),
639 newref + (selr?width:0) + (iminr>>1) + w2*(jminr>>1),
640 mb,w2,iminf&1,jminf&1,iminr&1,jminr&1,16);
642 /* 16x8 upper half block */
643 dmc8i = bdist1(oldref + (sel8uf?width:0) + (imin8uf>>1) + w2*(jmin8uf>>1),
644 newref + (sel8ur?width:0) + (imin8ur>>1) + w2*(jmin8ur>>1),
645 mb,w2,imin8uf&1,jmin8uf&1,imin8ur&1,jmin8ur&1,8);
647 /* 16x8 lower half block */
648 dmc8i+= bdist1(oldref + (sel8lf?width:0) + (imin8lf>>1) + w2*(jmin8lf>>1),
649 newref + (sel8lr?width:0) + (imin8lr>>1) + w2*(jmin8lr>>1),
650 mb+8*w2,w2,imin8lf&1,jmin8lf&1,imin8lr&1,jmin8lr&1,8);
652 /* select prediction type of minimum distance */
653 if (dmcfieldi<dmc8i && dmcfieldi<dmcfieldf && dmcfieldi<dmc8f
654 && dmcfieldi<dmcfieldr && dmcfieldi<dmc8r)
656 /* field, interpolated */
657 mbi->mb_type = MB_FORWARD|MB_BACKWARD;
658 mbi->motion_type = MC_FIELD;
659 vmc = bdist2(oldref + (self?width:0) + (iminf>>1) + w2*(jminf>>1),
660 newref + (selr?width:0) + (iminr>>1) + w2*(jminr>>1),
661 mb,w2,iminf&1,jminf&1,iminr&1,jminr&1,16);
663 else if (dmc8i<dmcfieldf && dmc8i<dmc8f
664 && dmc8i<dmcfieldr && dmc8i<dmc8r)
666 /* 16x8, interpolated */
667 mbi->mb_type = MB_FORWARD|MB_BACKWARD;
668 mbi->motion_type = MC_16X8;
670 /* upper half block */
671 vmc = bdist2(oldref + (sel8uf?width:0) + (imin8uf>>1) + w2*(jmin8uf>>1),
672 newref + (sel8ur?width:0) + (imin8ur>>1) + w2*(jmin8ur>>1),
673 mb,w2,imin8uf&1,jmin8uf&1,imin8ur&1,jmin8ur&1,8);
675 /* lower half block */
676 vmc+= bdist2(oldref + (sel8lf?width:0) + (imin8lf>>1) + w2*(jmin8lf>>1),
677 newref + (sel8lr?width:0) + (imin8lr>>1) + w2*(jmin8lr>>1),
678 mb+8*w2,w2,imin8lf&1,jmin8lf&1,imin8lr&1,jmin8lr&1,8);
680 else if (dmcfieldf<dmc8f && dmcfieldf<dmcfieldr && dmcfieldf<dmc8r)
683 mbi->mb_type = MB_FORWARD;
684 mbi->motion_type = MC_FIELD;
685 vmc = dist2(oldref + (self?width:0) + (iminf>>1) + w2*(jminf>>1),
686 mb,w2,iminf&1,jminf&1,16);
688 else if (dmc8f<dmcfieldr && dmc8f<dmc8r)
691 mbi->mb_type = MB_FORWARD;
692 mbi->motion_type = MC_16X8;
694 /* upper half block */
695 vmc = dist2(oldref + (sel8uf?width:0) + (imin8uf>>1) + w2*(jmin8uf>>1),
696 mb,w2,imin8uf&1,jmin8uf&1,8);
698 /* lower half block */
699 vmc+= dist2(oldref + (sel8lf?width:0) + (imin8lf>>1) + w2*(jmin8lf>>1),
700 mb+8*w2,w2,imin8lf&1,jmin8lf&1,8);
702 else if (dmcfieldr<dmc8r)
704 /* field, backward */
705 mbi->mb_type = MB_BACKWARD;
706 mbi->motion_type = MC_FIELD;
707 vmc = dist2(newref + (selr?width:0) + (iminr>>1) + w2*(jminr>>1),
708 mb,w2,iminr&1,jminr&1,16);
713 mbi->mb_type = MB_BACKWARD;
714 mbi->motion_type = MC_16X8;
716 /* upper half block */
717 vmc = dist2(newref + (sel8ur?width:0) + (imin8ur>>1) + w2*(jmin8ur>>1),
718 mb,w2,imin8ur&1,jmin8ur&1,8);
720 /* lower half block */
721 vmc+= dist2(newref + (sel8lr?width:0) + (imin8lr>>1) + w2*(jmin8lr>>1),
722 mb+8*w2,w2,imin8lr&1,jmin8lr&1,8);
725 /* select between intra and non-intra coding */
726 if (vmc>var && vmc>=9*256)
727 mbi->mb_type = MB_INTRA;
731 if (mbi->motion_type==MC_FIELD)
734 mbi->MV[0][0][0] = iminf - (i<<1);
735 mbi->MV[0][0][1] = jminf - (j<<1);
736 mbi->mv_field_sel[0][0] = self;
738 mbi->MV[0][1][0] = iminr - (i<<1);
739 mbi->MV[0][1][1] = jminr - (j<<1);
740 mbi->mv_field_sel[0][1] = selr;
745 mbi->MV[0][0][0] = imin8uf - (i<<1);
746 mbi->MV[0][0][1] = jmin8uf - (j<<1);
747 mbi->mv_field_sel[0][0] = sel8uf;
748 mbi->MV[1][0][0] = imin8lf - (i<<1);
749 mbi->MV[1][0][1] = jmin8lf - ((j+8)<<1);
750 mbi->mv_field_sel[1][0] = sel8lf;
752 mbi->MV[0][1][0] = imin8ur - (i<<1);
753 mbi->MV[0][1][1] = jmin8ur - (j<<1);
754 mbi->mv_field_sel[0][1] = sel8ur;
755 mbi->MV[1][1][0] = imin8lr - (i<<1);
756 mbi->MV[1][1][1] = jmin8lr - ((j+8)<<1);
757 mbi->mv_field_sel[1][1] = sel8lr;
766 * frame picture motion estimation
768 * org: top left pel of source reference frame
769 * ref: top left pel of reconstructed reference frame
770 * mb: macroblock to be matched
771 * i,j: location of mb relative to ref (=center of search window)
772 * sx,sy: half widths of search window
773 * iminp,jminp,dframep: location and value of best frame prediction
774 * imintp,jmintp,tselp: location of best field pred. for top field of mb
775 * iminbp,jminbp,bselp: location of best field pred. for bottom field of mb
776 * dfieldp: value of field prediction
778 static void frame_estimate(org,ref,mb,i,j,sx,sy,
779 iminp,jminp,imintp,jmintp,iminbp,jminbp,dframep,dfieldp,tselp,bselp,
781 unsigned char *org,*ref,*mb;
784 int *imintp,*jmintp,*iminbp,*jminbp;
785 int *dframep,*dfieldp;
787 int imins[2][2],jmins[2][2];
789 int dt,db,dmint,dminb;
790 int imint,iminb,jmint,jminb;
792 /* frame prediction */
793 *dframep = fullsearch(org,ref,mb,width,i,j,sx,sy,16,width,height,
796 /* predict top field from top field */
797 dt = fullsearch(org,ref,mb,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
800 /* predict top field from bottom field */
801 db = fullsearch(org+width,ref+width,mb,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
809 /* select prediction for top field */
812 dmint=dt; *imintp=imint; *jmintp=jmint; *tselp=0;
816 dmint=db; *imintp=iminb; *jmintp=jminb; *tselp=1;
819 /* predict bottom field from top field */
820 dt = fullsearch(org,ref,mb+width,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
823 /* predict bottom field from bottom field */
824 db = fullsearch(org+width,ref+width,mb+width,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
832 /* select prediction for bottom field */
835 dminb=db; *iminbp=iminb; *jminbp=jminb; *bselp=1;
839 dminb=dt; *iminbp=imint; *jminbp=jmint; *bselp=0;
842 *dfieldp=dmint+dminb;
846 * field picture motion estimation subroutine
848 * toporg: address of original top reference field
849 * topref: address of reconstructed top reference field
850 * botorg: address of original bottom reference field
851 * botref: address of reconstructed bottom reference field
852 * mb: macroblock to be matched
853 * i,j: location of mb (=center of search window)
854 * sx,sy: half width/height of search window
856 * iminp,jminp,selp,dfieldp: location and distance of best field prediction
857 * imin8up,jmin8up,sel8up: location of best 16x8 pred. for upper half of mb
858 * imin8lp,jmin8lp,sel8lp: location of best 16x8 pred. for lower half of mb
859 * d8p: distance of best 16x8 prediction
860 * iminsp,jminsp,dsp: location and distance of best same parity field
861 * prediction (needed for dual prime, only valid if
864 static void field_estimate(toporg,topref,botorg,botref,mb,i,j,sx,sy,ipflag,
865 iminp,jminp,imin8up,jmin8up,imin8lp,jmin8lp,dfieldp,d8p,selp,sel8up,sel8lp,
867 unsigned char *toporg, *topref, *botorg, *botref, *mb;
871 int *imin8up, *jmin8up, *imin8lp, *jmin8lp;
873 int *selp, *sel8up, *sel8lp;
874 int *iminsp, *jminsp, *dsp;
876 int dt, db, imint, jmint, iminb, jminb, notop, nobot;
878 /* if ipflag is set, predict from field of opposite parity only */
879 notop = ipflag && (pict_struct==TOP_FIELD);
880 nobot = ipflag && (pict_struct==BOTTOM_FIELD);
882 /* field prediction */
884 /* predict current field from top field */
886 dt = 65536; /* infinity */
888 dt = fullsearch(toporg,topref,mb,width<<1,
889 i,j,sx,sy>>1,16,width,height>>1,
892 /* predict current field from bottom field */
894 db = 65536; /* infinity */
896 db = fullsearch(botorg,botref,mb,width<<1,
897 i,j,sx,sy>>1,16,width,height>>1,
900 /* same parity prediction (only valid if ipflag==0) */
901 if (pict_struct==TOP_FIELD)
903 *iminsp = imint; *jminsp = jmint; *dsp = dt;
907 *iminsp = iminb; *jminsp = jminb; *dsp = db;
910 /* select field prediction */
913 *dfieldp = dt; *iminp = imint; *jminp = jmint; *selp = 0;
917 *dfieldp = db; *iminp = iminb; *jminp = jminb; *selp = 1;
921 /* 16x8 motion compensation */
923 /* predict upper half field from top field */
927 dt = fullsearch(toporg,topref,mb,width<<1,
928 i,j,sx,sy>>1,8,width,height>>1,
931 /* predict upper half field from bottom field */
935 db = fullsearch(botorg,botref,mb,width<<1,
936 i,j,sx,sy>>1,8,width,height>>1,
939 /* select prediction for upper half field */
942 *d8p = dt; *imin8up = imint; *jmin8up = jmint; *sel8up = 0;
946 *d8p = db; *imin8up = iminb; *jmin8up = jminb; *sel8up = 1;
949 /* predict lower half field from top field */
953 dt = fullsearch(toporg,topref,mb+(width<<4),width<<1,
954 i,j+8,sx,sy>>1,8,width,height>>1,
957 /* predict lower half field from bottom field */
961 db = fullsearch(botorg,botref,mb+(width<<4),width<<1,
962 i,j+8,sx,sy>>1,8,width,height>>1,
965 /* select prediction for lower half field */
968 *d8p += dt; *imin8lp = imint; *jmin8lp = jmint; *sel8lp = 0;
972 *d8p += db; *imin8lp = iminb; *jmin8lp = jminb; *sel8lp = 1;
976 static void dpframe_estimate(ref,mb,i,j,iminf,jminf,
977 iminp,jminp,imindmvp, jmindmvp, dmcp, vmcp)
978 unsigned char *ref, *mb;
980 int iminf[2][2], jminf[2][2];
982 int *imindmvp, *jmindmvp;
985 int pref,ppred,delta_x,delta_y;
986 int is,js,it,jt,ib,jb,it0,jt0,ib0,jb0;
987 int imins,jmins,imint,jmint,iminb,jminb,imindmv,jmindmv;
990 /* Calculate Dual Prime distortions for 9 delta candidates
991 * for each of the four minimum field vectors
992 * Note: only for P pictures!
995 /* initialize minimum dual prime distortion to large value */
998 for (pref=0; pref<2; pref++)
1000 for (ppred=0; ppred<2; ppred++)
1002 /* convert Cartesian absolute to relative motion vector
1003 * values (wrt current macroblock address (i,j)
1005 is = iminf[pref][ppred] - (i<<1);
1006 js = jminf[pref][ppred] - (j<<1);
1010 /* vertical field shift adjustment */
1016 /* mvxs and mvys scaling*/
1019 if (topfirst == ppred)
1021 /* second field: scale by 1/3 */
1022 is = (is>=0) ? (is+1)/3 : -((-is+1)/3);
1023 js = (js>=0) ? (js+1)/3 : -((-js+1)/3);
1029 /* vector for prediction from field of opposite 'parity' */
1032 /* vector for prediction of top field from bottom field */
1033 it0 = ((is+(is>0))>>1);
1034 jt0 = ((js+(js>0))>>1) - 1;
1036 /* vector for prediction of bottom field from top field */
1037 ib0 = ((3*is+(is>0))>>1);
1038 jb0 = ((3*js+(js>0))>>1) + 1;
1042 /* vector for prediction of top field from bottom field */
1043 it0 = ((3*is+(is>0))>>1);
1044 jt0 = ((3*js+(js>0))>>1) - 1;
1046 /* vector for prediction of bottom field from top field */
1047 ib0 = ((is+(is>0))>>1);
1048 jb0 = ((js+(js>0))>>1) + 1;
1051 /* convert back to absolute half-pel field picture coordinates */
1059 if (is >= 0 && is <= (width-16)<<1 &&
1060 js >= 0 && js <= (height-16))
1062 for (delta_y=-1; delta_y<=1; delta_y++)
1064 for (delta_x=-1; delta_x<=1; delta_x++)
1066 /* opposite field coordinates */
1072 if (it >= 0 && it <= (width-16)<<1 &&
1073 jt >= 0 && jt <= (height-16) &&
1074 ib >= 0 && ib <= (width-16)<<1 &&
1075 jb >= 0 && jb <= (height-16))
1077 /* compute prediction error */
1078 local_dist = bdist2(
1079 ref + (is>>1) + (width<<1)*(js>>1),
1080 ref + width + (it>>1) + (width<<1)*(jt>>1),
1081 mb, /* current mb location */
1082 width<<1, /* adjacent line distance */
1083 is&1, js&1, it&1, jt&1, /* half-pel flags */
1084 8); /* block height */
1085 local_dist += bdist2(
1086 ref + width + (is>>1) + (width<<1)*(js>>1),
1087 ref + (ib>>1) + (width<<1)*(jb>>1),
1088 mb + width, /* current mb location */
1089 width<<1, /* adjacent line distance */
1090 is&1, js&1, ib&1, jb&1, /* half-pel flags */
1091 8); /* block height */
1093 /* update delta with least distortion vector */
1094 if (local_dist < vmc)
1107 } /* end delta x loop */
1108 } /* end delta y loop */
1113 /* Compute L1 error for decision purposes */
1114 local_dist = bdist1(
1115 ref + (imins>>1) + (width<<1)*(jmins>>1),
1116 ref + width + (imint>>1) + (width<<1)*(jmint>>1),
1119 imins&1, jmins&1, imint&1, jmint&1,
1121 local_dist += bdist1(
1122 ref + width + (imins>>1) + (width<<1)*(jmins>>1),
1123 ref + (iminb>>1) + (width<<1)*(jminb>>1),
1126 imins&1, jmins&1, iminb&1, jminb&1,
1132 *imindmvp = imindmv;
1133 *jmindmvp = jmindmv;
1137 static void dpfield_estimate(topref,botref,mb,i,j,imins,jmins,
1138 imindmvp, jmindmvp, dmcp, vmcp)
1139 unsigned char *topref, *botref, *mb;
1142 int *imindmvp, *jmindmvp;
1145 unsigned char *sameref, *oppref;
1146 int io0,jo0,io,jo,delta_x,delta_y,mvxs,mvys,mvxo0,mvyo0;
1147 int imino,jmino,imindmv,jmindmv,vmc_dp,local_dist;
1149 /* Calculate Dual Prime distortions for 9 delta candidates */
1150 /* Note: only for P pictures! */
1152 /* Assign opposite and same reference pointer */
1153 if (pict_struct==TOP_FIELD)
1164 /* convert Cartesian absolute to relative motion vector
1165 * values (wrt current macroblock address (i,j)
1167 mvxs = imins - (i<<1);
1168 mvys = jmins - (j<<1);
1170 /* vector for prediction from field of opposite 'parity' */
1171 mvxo0 = (mvxs+(mvxs>0)) >> 1; /* mvxs // 2 */
1172 mvyo0 = (mvys+(mvys>0)) >> 1; /* mvys // 2 */
1174 /* vertical field shift correction */
1175 if (pict_struct==TOP_FIELD)
1180 /* convert back to absolute coordinates */
1181 io0 = mvxo0 + (i<<1);
1182 jo0 = mvyo0 + (j<<1);
1184 /* initialize minimum dual prime distortion to large value */
1187 for (delta_y = -1; delta_y <= 1; delta_y++)
1189 for (delta_x = -1; delta_x <=1; delta_x++)
1191 /* opposite field coordinates */
1195 if (io >= 0 && io <= (width-16)<<1 &&
1196 jo >= 0 && jo <= (height2-16)<<1)
1198 /* compute prediction error */
1199 local_dist = bdist2(
1200 sameref + (imins>>1) + width2*(jmins>>1),
1201 oppref + (io>>1) + width2*(jo>>1),
1202 mb, /* current mb location */
1203 width2, /* adjacent line distance */
1204 imins&1, jmins&1, io&1, jo&1, /* half-pel flags */
1205 16); /* block height */
1207 /* update delta with least distortion vector */
1208 if (local_dist < vmc_dp)
1214 vmc_dp = local_dist;
1217 } /* end delta x loop */
1218 } /* end delta y loop */
1220 /* Compute L1 error for decision purposes */
1222 sameref + (imins>>1) + width2*(jmins>>1),
1223 oppref + (imino>>1) + width2*(jmino>>1),
1224 mb, /* current mb location */
1225 width2, /* adjacent line distance */
1226 imins&1, jmins&1, imino&1, jmino&1, /* half-pel flags */
1227 16); /* block height */
1229 *imindmvp = imindmv;
1230 *jmindmvp = jmindmv;
1235 * full search block matching
1237 * blk: top left pel of (16*h) block
1238 * h: height of block
1239 * lx: distance (in bytes) of vertically adjacent pels in ref,blk
1240 * org: top left pel of source reference picture
1241 * ref: top left pel of reconstructed reference picture
1242 * i0,j0: center of search window
1243 * sx,sy: half widths of search window
1244 * xmax,ymax: right/bottom limits of search area
1245 * iminp,jminp: pointers to where the result is stored
1246 * result is given as half pel offset from ref(0,0)
1247 * i.e. NOT relative to (i0,j0)
1249 static int fullsearch(org,ref,blk,lx,i0,j0,sx,sy,h,xmax,ymax,iminp,jminp)
1250 unsigned char *org,*ref,*blk;
1251 int lx,i0,j0,sx,sy,h,xmax,ymax;
1254 int i,j,imin,jmin,ilow,ihigh,jlow,jhigh;
1276 /* full pel search, spiraling outwards */
1280 dmin = dist1(org + imin + lx * jmin, blk, lx, 0, 0, h, 65536);
1282 sxy = (sx>sy) ? sx : sy;
1284 for (l = 1; l <= sxy; l++)
1288 for (k=0; k<8*l; k++)
1290 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1292 d = dist1(org + i + lx * j, blk, lx, 0, 0, h, dmin);
1303 else if (k<4*l) j++;
1304 else if (k<6*l) i--;
1313 ilow = imin - (imin>0);
1314 ihigh = imin + (imin<((xmax-16)<<1));
1315 jlow = jmin - (jmin>0);
1316 jhigh = jmin + (jmin<((ymax-h)<<1));
1318 for (j=jlow; j<=jhigh; j++)
1319 for (i=ilow; i<=ihigh; i++)
1321 d = dist1(ref+(i>>1)+lx*(j>>1),blk,lx,i&1,j&1,h,dmin);
1338 * total absolute difference between two (16*h) blocks
1339 * including optional half pel interpolation of blk1 (hx,hy)
1340 * blk1,blk2: addresses of top left pels of both blocks
1341 * lx: distance (in bytes) of vertically adjacent pels
1342 * hx,hy: flags for horizontal and/or vertical interpolation
1343 * h: height of block (usually 8 or 16)
1344 * distlim: bail out if sum exceeds this value
1348 void inline mmx_start_block()
1357 void inline mmx_absdiff(unsigned char *a, unsigned char *b)
1361 movq (%%ebx), %%mm0; // Get first half of row1
1362 movq (%%ecx), %%mm1; // Get first half of row2
1363 movq %%mm0, %%mm2; // Make a copy of row1 for absdiff operation
1364 movq 8(%%ebx), %%mm3; // Get second half of row1
1365 psubusb %%mm1, %%mm0; // Subtract the first halves one way
1366 psubusb %%mm2, %%mm1; // Subtract the other way
1367 movq 8(%%ecx), %%mm4; // Get second half of row2
1368 por %%mm1, %%mm0; // Merge first half results
1369 movq %%mm3, %%mm5; // Copy for absdiff operation
1370 movq %%mm0, %%mm1; // Keep a copy
1371 psubusb %%mm4, %%mm3; // Subtract second halves one way
1372 punpcklbw %%mm6, %%mm0; // Unpack to higher precision for accumulation
1373 psubusb %%mm5, %%mm4; // Subtract the other way
1374 psrlq $32, %%mm1; // Shift registeres for accumulation
1375 por %%mm4, %%mm3; // merge results of 2nd half
1376 punpcklbw %%mm6, %%mm1; // unpack to higher precision for accumulation
1377 movq %%mm3, %%mm4; // keep a copy
1378 punpcklbw %%mm6, %%mm3; // unpack to higher precision for accumulation
1379 paddw %%mm0, %%mm7; // accumulate difference
1380 psrlq $32, %%mm4; // shift results for accumulation
1381 paddw %%mm1, %%mm7; // accumulate difference
1382 punpcklbw %%mm6, %%mm4; // unpack to higher precision for accumulation
1383 paddw %%mm3, %%mm7; // accumulate difference
1384 paddw %%mm4, %%mm7; // accumulate difference
1391 unsigned int mmx_accum_absdiff()
1393 unsigned long long r = 0;
1398 punpcklwd %%mm6, %%mm4;
1399 punpckhwd %%mm6, %%mm5;
1402 punpckldq %%mm6, %%mm5;
1403 punpckhdq %%mm6, %%mm4;
1405 movq %%mm4, (%%ebx);
1413 static unsigned long MMX_AVGDIFF_1[] = {0x00010001, 0x00010001};
1416 void inline mmx_avgdiff(unsigned char *p1, unsigned char *p2, unsigned char *p3)
1420 movq (%%ebx), %%mm0; // Load 8 pixels from a
1421 pxor %%mm4, %%mm4; // Zero out temp for unpacking a
1422 movq %%mm0, %%mm2; // Make a copy of a for unpacking
1423 movq (%%ecx), %%mm1; // Load 8 pixels from b
1424 pxor %%mm3, %%mm3; // Zero out b's upper unpacked destination
1425 punpcklbw %%mm4, %%mm2; // Unpack lower 4 pixels from a for addition
1426 movq %%mm1, %%mm5; // Copy b for unpacking
1427 punpckhbw %%mm4, %%mm0; // Unpack upper 4 pixels from a for addition
1428 punpcklbw %%mm3, %%mm5; // Unpack lower 4 pixels from b for addition
1429 paddw %%mm2, %%mm5; // Add lower a and lower b unpacked
1430 punpckhbw %%mm3, %%mm1; // Unpack upper 4 pixels from b for addition
1431 paddw %%mm0, %%mm1; // Add upper a and upper b unpacked
1432 movq (%%edx), %%mm2; // Load c for difference
1433 paddw MMX_AVGDIFF_1, %%mm5; // Add 1 to the result of lower a + b
1434 pxor %%mm4, %%mm4; // Zero out temp for c unpacking
1435 movq %%mm2, %%mm3; // Make a copy of c for unpacking
1436 paddw MMX_AVGDIFF_1, %%mm1; // Add 1 to the result of upper a + b
1437 punpcklbw %%mm4, %%mm3; // Unpack lower 4 pixels from c for subtraction
1438 punpckhbw %%mm4, %%mm2; // Unpack upper 4 pixels from c
1439 movq %%mm3, %%mm0; // Make a copy of lower c for absdiff
1440 psraw $1, %%mm5; // Divide result of lower a + b by 2
1441 movq %%mm2, %%mm4; // Make a copy of upper c for absdiff
1442 psraw $1, %%mm1; // Divide result of upper a + b by 2
1443 psubusw %%mm5, %%mm3; // Subtract lower pixels one way
1444 psubusw %%mm1, %%mm2; // Subtract upper pixels one way
1445 psubusw %%mm0, %%mm5; // Subtract lower pixels the other way
1446 por %%mm5, %%mm3; // Or the result of the lower pixels
1447 psubusw %%mm4, %%mm1; // Subtract upper pixels the other way
1448 por %%mm1, %%mm2; // Or the result of the upper pixels
1449 paddw %%mm3, %%mm7; // Accumulate lower pixels
1450 paddw %%mm2, %%mm7; // Accumulate upper pixels
1451 movq %%mm3, (%%edx);
1454 : "b" (p1), "c" (p2), "d" (p3));
1457 static unsigned long MMX_ACCUM_AND[] = {0xffffffff, 0x00000000};
1459 unsigned int mmx_accum_avgdiff()
1461 unsigned long long r = 0;
1464 pxor %%mm5, %%mm5; // Clear temp for unpacking
1465 movq %%mm7, %%mm6; // Make a copy for unpacking
1466 punpcklwd %%mm5, %%mm6; // Unpack lower 2 pixels for accumulation
1467 punpckhwd %%mm5, %%mm7; // Unpack high 2 pixels for accumulation
1468 paddw %%mm6, %%mm7; // Add 2 doublewords in each register
1469 movq %%mm7, %%mm6; // Copy the result for a final add
1470 pand MMX_ACCUM_AND, %%mm7; // And the result for accumulation
1471 psrlq $32, %%mm6; // Shift the copy right for accumulation
1472 paddd %%mm6, %%mm7; // Add the results
1473 movq %%mm7, (%%ebx); // Store result
1478 return (unsigned int)r;
1481 // avgdiff only gave a 2 second improvement and since it didn't work anyway, it was
1484 static int dist1(blk1, blk2, lx, hx, hy, h, distlim)
1485 unsigned char *blk1,*blk2;
1489 unsigned char *p1, *p1a, *p2;
1502 for(j = 0; j < h; j++)
1505 mmx_absdiff(p1, p2);
1507 if((v = p1[0] - p2[0]) < 0) v = -v; s += v;
1508 if((v = p1[1] - p2[1]) < 0) v = -v; s += v;
1509 if((v = p1[2] - p2[2]) < 0) v = -v; s += v;
1510 if((v = p1[3] - p2[3]) < 0) v = -v; s += v;
1511 if((v = p1[4] - p2[4]) < 0) v = -v; s += v;
1512 if((v = p1[5] - p2[5]) < 0) v = -v; s += v;
1513 if((v = p1[6] - p2[6]) < 0) v = -v; s += v;
1514 if((v = p1[7] - p2[7]) < 0) v = -v; s += v;
1515 if((v = p1[8] - p2[8]) < 0) v = -v; s += v;
1516 if((v = p1[9] - p2[9]) < 0) v = -v; s += v;
1517 if((v = p1[10] - p2[10]) < 0) v = -v; s += v;
1518 if((v = p1[11] - p2[11]) < 0) v = -v; s += v;
1519 if((v = p1[12] - p2[12]) < 0) v = -v; s += v;
1520 if((v = p1[13] - p2[13]) < 0) v = -v; s += v;
1521 if((v = p1[14] - p2[14]) < 0) v = -v; s += v;
1522 if((v = p1[15] - p2[15]) < 0) v = -v; s += v;
1523 if(s >= distlim) break;
1530 s = mmx_accum_absdiff();
1537 // mmx_start_block();
1539 for(j = 0; j < h; j++)
1542 // mmx_avgdiff(p1, &p1[1], p2);
1543 // mmx_avgdiff(&p1[8], &p1[9], &p2[8]);
1545 v = ((unsigned int)(p1[0] + p1[1] + 1) >> 1) - p2[0]; if(v < 0) s -= v; else s += v;
1546 v = ((unsigned int)(p1[1] + p1[2] + 1) >> 1) - p2[1]; if(v < 0) s -= v; else s += v;
1547 v = ((unsigned int)(p1[2] + p1[3] + 1) >> 1) - p2[2]; if(v < 0) s -= v; else s += v;
1548 v = ((unsigned int)(p1[3] + p1[4] + 1) >> 1) - p2[3]; if(v < 0) s -= v; else s += v;
1549 v = ((unsigned int)(p1[4] + p1[5] + 1) >> 1) - p2[4]; if(v < 0) s -= v; else s += v;
1550 v = ((unsigned int)(p1[5] + p1[6] + 1) >> 1) - p2[5]; if(v < 0) s -= v; else s += v;
1551 v = ((unsigned int)(p1[6] + p1[7] + 1) >> 1) - p2[6]; if(v < 0) s -= v; else s += v;
1552 v = ((unsigned int)(p1[7] + p1[8] + 1) >> 1) - p2[7]; if(v < 0) s -= v; else s += v;
1553 v = ((unsigned int)(p1[8] + p1[9] + 1) >> 1) - p2[8]; if(v < 0) s -= v; else s += v;
1554 v = ((unsigned int)(p1[9] + p1[10] + 1) >> 1) - p2[9]; if(v < 0) s -= v; else s += v;
1555 v = ((unsigned int)(p1[10] + p1[11] + 1) >> 1) - p2[10]; if(v < 0) s -= v; else s += v;
1556 v = ((unsigned int)(p1[11] + p1[12] + 1) >> 1) - p2[11]; if(v < 0) s -= v; else s += v;
1557 v = ((unsigned int)(p1[12] + p1[13] + 1) >> 1) - p2[12]; if(v < 0) s -= v; else s += v;
1558 v = ((unsigned int)(p1[13] + p1[14] + 1) >> 1) - p2[13]; if(v < 0) s -= v; else s += v;
1559 v = ((unsigned int)(p1[14] + p1[15] + 1) >> 1) - p2[14]; if(v < 0) s -= v; else s += v;
1560 v = ((unsigned int)(p1[15] + p1[16] + 1) >> 1) - p2[15]; if(v < 0) s -= v; else s += v;
1567 // s = mmx_accum_avgdiff();
1573 // mmx_start_block();
1576 for(j = 0; j < h; j++)
1579 // mmx_avgdiff(p1, p1a, p2);
1580 // mmx_avgdiff(&p1[8], &p1a[8], &p2[8]);
1582 v = ((unsigned int)(p1[0] + p1a[0] + 1) >> 1) - p2[0]; if(v < 0) s -= v; else s += v;
1583 v = ((unsigned int)(p1[1] + p1a[1] + 1) >> 1) - p2[1]; if(v < 0) s -= v; else s += v;
1584 v = ((unsigned int)(p1[2] + p1a[2] + 1) >> 1) - p2[2]; if(v < 0) s -= v; else s += v;
1585 v = ((unsigned int)(p1[3] + p1a[3] + 1) >> 1) - p2[3]; if(v < 0) s -= v; else s += v;
1586 v = ((unsigned int)(p1[4] + p1a[4] + 1) >> 1) - p2[4]; if(v < 0) s -= v; else s += v;
1587 v = ((unsigned int)(p1[5] + p1a[5] + 1) >> 1) - p2[5]; if(v < 0) s -= v; else s += v;
1588 v = ((unsigned int)(p1[6] + p1a[6] + 1) >> 1) - p2[6]; if(v < 0) s -= v; else s += v;
1589 v = ((unsigned int)(p1[7] + p1a[7] + 1) >> 1) - p2[7]; if(v < 0) s -= v; else s += v;
1590 v = ((unsigned int)(p1[8] + p1a[8] + 1) >> 1) - p2[8]; if(v < 0) s -= v; else s += v;
1591 v = ((unsigned int)(p1[9] + p1a[9] + 1) >> 1) - p2[9]; if(v < 0) s -= v; else s += v;
1592 v = ((unsigned int)(p1[10] + p1a[10] + 1) >> 1) - p2[10]; if(v < 0) s -= v; else s += v;
1593 v = ((unsigned int)(p1[11] + p1a[11] + 1) >> 1) - p2[11]; if(v < 0) s -= v; else s += v;
1594 v = ((unsigned int)(p1[12] + p1a[12] + 1) >> 1) - p2[12]; if(v < 0) s -= v; else s += v;
1595 v = ((unsigned int)(p1[13] + p1a[13] + 1) >> 1) - p2[13]; if(v < 0) s -= v; else s += v;
1596 v = ((unsigned int)(p1[14] + p1a[14] + 1) >> 1) - p2[14]; if(v < 0) s -= v; else s += v;
1597 v = ((unsigned int)(p1[15] + p1a[15] + 1) >> 1) - p2[15]; if(v < 0) s -= v; else s += v;
1605 // s = mmx_accum_avgdiff();
1608 else /* if (hx && hy) */
1611 for(j = 0; j < h; j++)
1613 v = ((unsigned int)(p1[0] + p1[1] + p1a[0] + p1a[1] + 2) >> 2) - p2[0]; if(v < 0) s -= v; else s += v;
1614 v = ((unsigned int)(p1[1] + p1[2] + p1a[1] + p1a[3] + 2) >> 2) - p2[1]; if(v < 0) s -= v; else s += v;
1615 v = ((unsigned int)(p1[2] + p1[3] + p1a[2] + p1a[3] + 2) >> 2) - p2[2]; if(v < 0) s -= v; else s += v;
1616 v = ((unsigned int)(p1[3] + p1[4] + p1a[3] + p1a[4] + 2) >> 2) - p2[3]; if(v < 0) s -= v; else s += v;
1617 v = ((unsigned int)(p1[4] + p1[5] + p1a[4] + p1a[5] + 2) >> 2) - p2[4]; if(v < 0) s -= v; else s += v;
1618 v = ((unsigned int)(p1[5] + p1[6] + p1a[5] + p1a[6] + 2) >> 2) - p2[5]; if(v < 0) s -= v; else s += v;
1619 v = ((unsigned int)(p1[6] + p1[7] + p1a[6] + p1a[7] + 2) >> 2) - p2[6]; if(v < 0) s -= v; else s += v;
1620 v = ((unsigned int)(p1[7] + p1[8] + p1a[7] + p1a[8] + 2) >> 2) - p2[7]; if(v < 0) s -= v; else s += v;
1621 v = ((unsigned int)(p1[8] + p1[9] + p1a[8] + p1a[9] + 2) >> 2) - p2[8]; if(v < 0) s -= v; else s += v;
1622 v = ((unsigned int)(p1[9] + p1[10] + p1a[9] + p1a[10] + 2) >> 2) - p2[9]; if(v < 0) s -= v; else s += v;
1623 v = ((unsigned int)(p1[10] + p1[11] + p1a[10] + p1a[11] + 2) >> 2) - p2[10]; if(v < 0) s -= v; else s += v;
1624 v = ((unsigned int)(p1[11] + p1[12] + p1a[11] + p1a[12] + 2) >> 2) - p2[11]; if(v < 0) s -= v; else s += v;
1625 v = ((unsigned int)(p1[12] + p1[13] + p1a[12] + p1a[13] + 2) >> 2) - p2[12]; if(v < 0) s -= v; else s += v;
1626 v = ((unsigned int)(p1[13] + p1[14] + p1a[13] + p1a[14] + 2) >> 2) - p2[13]; if(v < 0) s -= v; else s += v;
1627 v = ((unsigned int)(p1[14] + p1[15] + p1a[14] + p1a[15] + 2) >> 2) - p2[14]; if(v < 0) s -= v; else s += v;
1628 v = ((unsigned int)(p1[15] + p1[16] + p1a[15] + p1a[16] + 2) >> 2) - p2[15]; if(v < 0) s -= v; else s += v;
1638 * total squared difference between two (16*h) blocks
1639 * including optional half pel interpolation of blk1 (hx,hy)
1640 * blk1,blk2: addresses of top left pels of both blocks
1641 * lx: distance (in bytes) of vertically adjacent pels
1642 * hx,hy: flags for horizontal and/or vertical interpolation
1643 * h: height of block (usually 8 or 16)
1645 static int dist2(blk1, blk2, lx, hx, hy, h)
1646 unsigned char *blk1,*blk2;
1649 register unsigned char *p1,*p1a,*p2;
1658 for(j = 0; j < h; j++)
1660 v = p1[0] - p2[0]; s += v * v;
1661 v = p1[1] - p2[1]; s += v * v;
1662 v = p1[2] - p2[2]; s += v * v;
1663 v = p1[3] - p2[3]; s += v * v;
1664 v = p1[4] - p2[4]; s += v * v;
1665 v = p1[5] - p2[5]; s += v * v;
1666 v = p1[6] - p2[6]; s += v * v;
1667 v = p1[7] - p2[7]; s += v * v;
1668 v = p1[8] - p2[8]; s += v * v;
1669 v = p1[9] - p2[9]; s += v * v;
1670 v = p1[10] - p2[10]; s += v * v;
1671 v = p1[11] - p2[11]; s += v * v;
1672 v = p1[12] - p2[12]; s += v * v;
1673 v = p1[13] - p2[13]; s += v * v;
1674 v = p1[14] - p2[14]; s += v * v;
1675 v = p1[15] - p2[15]; s += v * v;
1683 for (j = 0; j < h; j++)
1685 v = ((unsigned int)(p1[0] + p1[1] + 1) >> 1) - p2[0]; s += v * v;
1686 v = ((unsigned int)(p1[1] + p1[2] + 1) >> 1) - p2[1]; s += v * v;
1687 v = ((unsigned int)(p1[2] + p1[3] + 1) >> 1) - p2[2]; s += v * v;
1688 v = ((unsigned int)(p1[3] + p1[4] + 1) >> 1) - p2[3]; s += v * v;
1689 v = ((unsigned int)(p1[4] + p1[5] + 1) >> 1) - p2[4]; s += v * v;
1690 v = ((unsigned int)(p1[5] + p1[6] + 1) >> 1) - p2[5]; s += v * v;
1691 v = ((unsigned int)(p1[6] + p1[7] + 1) >> 1) - p2[6]; s += v * v;
1692 v = ((unsigned int)(p1[7] + p1[8] + 1) >> 1) - p2[7]; s += v * v;
1693 v = ((unsigned int)(p1[8] + p1[9] + 1) >> 1) - p2[8]; s += v * v;
1694 v = ((unsigned int)(p1[9] + p1[10] + 1) >> 1) - p2[9]; s += v * v;
1695 v = ((unsigned int)(p1[10]+ p1[11] + 1) >> 1) - p2[10]; s += v * v;
1696 v = ((unsigned int)(p1[11]+ p1[12] + 1) >> 1) - p2[11]; s += v * v;
1697 v = ((unsigned int)(p1[12]+ p1[13] + 1) >> 1) - p2[12]; s += v * v;
1698 v = ((unsigned int)(p1[13]+ p1[14] + 1) >> 1) - p2[13]; s += v * v;
1699 v = ((unsigned int)(p1[14]+ p1[15] + 1) >> 1) - p2[14]; s += v * v;
1700 v = ((unsigned int)(p1[15]+ p1[16] + 1) >> 1) - p2[15]; s += v * v;
1709 for(j = 0; j < h; j++)
1711 v = ((unsigned int)(p1[0] + p1a[0] + 1) >> 1) - p2[0]; s += v * v;
1712 v = ((unsigned int)(p1[1] + p1a[1] + 1) >> 1) - p2[1]; s += v * v;
1713 v = ((unsigned int)(p1[2] + p1a[2] + 1) >> 1) - p2[2]; s += v * v;
1714 v = ((unsigned int)(p1[3] + p1a[3] + 1) >> 1) - p2[3]; s += v * v;
1715 v = ((unsigned int)(p1[4] + p1a[4] + 1) >> 1) - p2[4]; s += v * v;
1716 v = ((unsigned int)(p1[5] + p1a[5] + 1) >> 1) - p2[5]; s += v * v;
1717 v = ((unsigned int)(p1[6] + p1a[6] + 1) >> 1) - p2[6]; s += v * v;
1718 v = ((unsigned int)(p1[7] + p1a[7] + 1) >> 1) - p2[7]; s += v * v;
1719 v = ((unsigned int)(p1[8] + p1a[8] + 1) >> 1) - p2[8]; s += v * v;
1720 v = ((unsigned int)(p1[9] + p1a[9] + 1) >> 1) - p2[9]; s += v * v;
1721 v = ((unsigned int)(p1[10]+ p1a[10]+ 1) >> 1) - p2[10]; s += v * v;
1722 v = ((unsigned int)(p1[11]+ p1a[11]+ 1) >> 1) - p2[11]; s += v * v;
1723 v = ((unsigned int)(p1[12]+ p1a[12]+ 1) >> 1) - p2[12]; s += v * v;
1724 v = ((unsigned int)(p1[13]+ p1a[13]+ 1) >> 1) - p2[13]; s += v * v;
1725 v = ((unsigned int)(p1[14]+ p1a[14]+ 1) >> 1) - p2[14]; s += v * v;
1726 v = ((unsigned int)(p1[15]+ p1a[15]+ 1) >> 1) - p2[15]; s += v * v;
1732 else /* if (hx && hy) */
1735 for(j = 0; j < h; j++)
1737 v = ((unsigned int)(p1[0] + p1[1] + p1a[0] + p1a[1] + 2) >> 2) - p2[0]; s += v * v;
1738 v = ((unsigned int)(p1[1] + p1[2] + p1a[1] + p1a[2] + 2) >> 2) - p2[1]; s += v * v;
1739 v = ((unsigned int)(p1[2] + p1[3] + p1a[2] + p1a[3] + 2) >> 2) - p2[2]; s += v * v;
1740 v = ((unsigned int)(p1[3] + p1[4] + p1a[3] + p1a[4] + 2) >> 2) - p2[3]; s += v * v;
1741 v = ((unsigned int)(p1[4] + p1[5] + p1a[4] + p1a[5] + 2) >> 2) - p2[4]; s += v * v;
1742 v = ((unsigned int)(p1[5] + p1[6] + p1a[5] + p1a[6] + 2) >> 2) - p2[5]; s += v * v;
1743 v = ((unsigned int)(p1[6] + p1[7] + p1a[6] + p1a[7] + 2) >> 2) - p2[6]; s += v * v;
1744 v = ((unsigned int)(p1[7] + p1[8] + p1a[7] + p1a[8] + 2) >> 2) - p2[7]; s += v * v;
1745 v = ((unsigned int)(p1[8] + p1[9] + p1a[8] + p1a[9] + 2) >> 2) - p2[8]; s += v * v;
1746 v = ((unsigned int)(p1[9] + p1[10] + p1a[9] + p1a[10] + 2) >> 2) - p2[9]; s += v * v;
1747 v = ((unsigned int)(p1[10] + p1[11] + p1a[10] + p1a[11] + 2) >> 2) - p2[10]; s += v * v;
1748 v = ((unsigned int)(p1[11] + p1[12] + p1a[11] + p1a[12] + 2) >> 2) - p2[11]; s += v * v;
1749 v = ((unsigned int)(p1[12] + p1[13] + p1a[12] + p1a[13] + 2) >> 2) - p2[12]; s += v * v;
1750 v = ((unsigned int)(p1[13] + p1[14] + p1a[13] + p1a[14] + 2) >> 2) - p2[13]; s += v * v;
1751 v = ((unsigned int)(p1[14] + p1[15] + p1a[14] + p1a[15] + 2) >> 2) - p2[14]; s += v * v;
1752 v = ((unsigned int)(p1[15] + p1[16] + p1a[15] + p1a[16] + 2) >> 2) - p2[15]; s += v * v;
1763 * absolute difference error between a (16*h) block and a bidirectional
1766 * p2: address of top left pel of block
1767 * pf,hxf,hyf: address and half pel flags of forward ref. block
1768 * pb,hxb,hyb: address and half pel flags of backward ref. block
1769 * h: height of block
1770 * lx: distance (in bytes) of vertically adjacent pels in p2,pf,pb
1772 static int bdist1(pf,pb,p2,lx,hxf,hyf,hxb,hyb,h)
1773 unsigned char *pf,*pb,*p2;
1774 int lx,hxf,hyf,hxb,hyb,h;
1776 unsigned char *pfa,*pfb,*pfc,*pba,*pbb,*pbc;
1792 for (i=0; i<16; i++)
1794 v = ((((unsigned int)(*pf++ + *pfa++ + *pfb++ + *pfc++ + 2)>>2) +
1795 ((unsigned int)(*pb++ + *pba++ + *pbb++ + *pbc++ + 2)>>2) + 1)>>1)
1817 * squared error between a (16*h) block and a bidirectional
1820 * p2: address of top left pel of block
1821 * pf,hxf,hyf: address and half pel flags of forward ref. block
1822 * pb,hxb,hyb: address and half pel flags of backward ref. block
1823 * h: height of block
1824 * lx: distance (in bytes) of vertically adjacent pels in p2,pf,pb
1826 static int bdist2(pf,pb,p2,lx,hxf,hyf,hxb,hyb,h)
1827 unsigned char *pf,*pb,*p2;
1828 int lx,hxf,hyf,hxb,hyb,h;
1830 register unsigned char *pfa,*pfb,*pfc,*pba,*pbb,*pbc;
1832 register int i, s, v;
1844 for(j = 0; j < h; j++)
1846 v = ((((unsigned int)(pf[0] + pfa[0] + pfb[0] + pfc[0] + 2) >> 2) +
1847 ((unsigned int)(pb[0] + pba[0] + pbb[0] + pbc[0] + 2) >> 2) + 1) >> 1) -
1850 v = ((((unsigned int)(pf[1] + pfa[1] + pfb[1] + pfc[1] + 2) >> 2) +
1851 ((unsigned int)(pb[1] + pba[1] + pbb[1] + pbc[1] + 2) >> 2) + 1) >> 1) -
1854 v = ((((unsigned int)(pf[2] + pfa[2] + pfb[2] + pfc[2] + 2) >> 2) +
1855 ((unsigned int)(pb[2] + pba[2] + pbb[2] + pbc[2] + 2) >> 2) + 1) >> 1) -
1858 v = ((((unsigned int)(pf[3] + pfa[3] + pfb[3] + pfc[3] + 2) >> 2) +
1859 ((unsigned int)(pb[3] + pba[3] + pbb[3] + pbc[3] + 2) >> 2) + 1) >> 1) -
1862 v = ((((unsigned int)(pf[4] + pfa[4] + pfb[4] + pfc[4] + 2) >> 2) +
1863 ((unsigned int)(pb[4] + pba[4] + pbb[4] + pbc[4] + 2) >> 2) + 1) >> 1) -
1866 v = ((((unsigned int)(pf[5] + pfa[5] + pfb[5] + pfc[5] + 2) >> 2) +
1867 ((unsigned int)(pb[5] + pba[5] + pbb[5] + pbc[5] + 2) >> 2) + 1) >> 1) -
1870 v = ((((unsigned int)(pf[6] + pfa[6] + pfb[6] + pfc[6] + 2) >> 2) +
1871 ((unsigned int)(pb[6] + pba[6] + pbb[6] + pbc[6] + 2) >> 2) + 1) >> 1) -
1874 v = ((((unsigned int)(pf[7] + pfa[7] + pfb[7] + pfc[7] + 2) >> 2) +
1875 ((unsigned int)(pb[7] + pba[7] + pbb[7] + pbc[7] + 2) >> 2) + 1) >> 1) -
1878 v = ((((unsigned int)(pf[8] + pfa[8] + pfb[8] + pfc[8] + 2) >> 2) +
1879 ((unsigned int)(pb[8] + pba[8] + pbb[8] + pbc[8] + 2) >> 2) + 1) >> 1) -
1882 v = ((((unsigned int)(pf[9] + pfa[9] + pfb[9] + pfc[9] + 2) >> 2) +
1883 ((unsigned int)(pb[9] + pba[9] + pbb[9] + pbc[9] + 2) >> 2) + 1) >> 1) -
1886 v = ((((unsigned int)(pf[10] + pfa[10] + pfb[10] + pfc[10] + 2) >> 2) +
1887 ((unsigned int)(pb[10] + pba[10] + pbb[10] + pbc[10] + 2) >> 2) + 1) >> 1) -
1890 v = ((((unsigned int)(pf[11] + pfa[11] + pfb[11] + pfc[11] + 2) >> 2) +
1891 ((unsigned int)(pb[11] + pba[11] + pbb[11] + pbc[11] + 2) >> 2) + 1) >> 1) -
1894 v = ((((unsigned int)(pf[12] + pfa[12] + pfb[12] + pfc[12] + 2) >> 2) +
1895 ((unsigned int)(pb[12] + pba[12] + pbb[12] + pbc[12] + 2) >> 2) + 1) >> 1) -
1898 v = ((((unsigned int)(pf[13] + pfa[13] + pfb[13] + pfc[13] + 2) >> 2) +
1899 ((unsigned int)(pb[13] + pba[13] + pbb[13] + pbc[13] + 2) >> 2) + 1) >> 1) -
1902 v = ((((unsigned int)(pf[14] + pfa[14] + pfb[14] + pfc[14] + 2) >> 2) +
1903 ((unsigned int)(pb[14] + pba[14] + pbb[14] + pbc[14] + 2) >> 2) + 1) >> 1) -
1906 v = ((((unsigned int)(pf[15] + pfa[15] + pfb[15] + pfc[15] + 2) >> 2) +
1907 ((unsigned int)(pb[15] + pba[15] + pbb[15] + pbc[15] + 2) >> 2) + 1) >> 1) -
1925 * variance of a (16*16) block, multiplied by 256
1926 * p: address of top left pel of block
1927 * lx: distance (in bytes) of vertically adjacent pels
1929 static int variance(p,lx)
1934 register unsigned int v, s, s2;
1938 for (j=0; j<16; j++)
1940 v = p[0]; s += v; s2 += v * v;
1941 v = p[1]; s += v; s2 += v * v;
1942 v = p[2]; s += v; s2 += v * v;
1943 v = p[3]; s += v; s2 += v * v;
1944 v = p[4]; s += v; s2 += v * v;
1945 v = p[5]; s += v; s2 += v * v;
1946 v = p[6]; s += v; s2 += v * v;
1947 v = p[7]; s += v; s2 += v * v;
1948 v = p[8]; s += v; s2 += v * v;
1949 v = p[9]; s += v; s2 += v * v;
1950 v = p[10]; s += v; s2 += v * v;
1951 v = p[11]; s += v; s2 += v * v;
1952 v = p[12]; s += v; s2 += v * v;
1953 v = p[13]; s += v; s2 += v * v;
1954 v = p[14]; s += v; s2 += v * v;
1955 v = p[15]; s += v; s2 += v * v;
1958 return s2 - (s*s)/256;