/* * * mblock_sub44_sads_x86_h.c * Copyright (C) 2000 Andrew Stevens * * Fast block sum-absolute difference computation for a rectangular area 4*x * by y where y > h against a 4 by h block. * * Used for 4*4 sub-sampled motion compensation calculations. * * * This file is part of mpeg2enc, a free MPEG-2 video stream encoder * based on the original MSSG reference design * * mpeg2enc is free software; you can redistribute new parts * and/or modify under the terms of the GNU General Public License * as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * mpeg2enc is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * See the files for those sections (c) MSSG * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * * Generates a vector sad's for 4*4 sub-sampled pel (qpel) data (with * co-ordinates and top-left qpel address) from specified rectangle * against a specified 16*h pel (4*4 qpel) reference block. The * generated vector contains results only for those sad's that fall * below twice the running best sad and are aligned on 8-pel * boundaries * * Invariant: blk points to top-left sub-sampled pel for macroblock * at (ilow,ihigh) * i{low,high) j(low,high) must be multiples of 4. * * sad = Sum Absolute Differences * * NOTES: for best efficiency i{low,high) should be multiples of 16. * * */ int SIMD_SUFFIX(mblock_sub44_dists)( uint8_t *blk, uint8_t *ref, int ilow,int jlow, int ihigh, int jhigh, int h, int rowstride, int threshold, mc_result_s *resvec) { int32_t x,y; uint8_t *currowblk = blk; uint8_t *curblk; mc_result_s *cres = resvec; int gridrowstride = (rowstride); for( y=jlow; y <= jhigh ; y+=4) { curblk = currowblk; for( x = ilow; x <= ihigh; x += 4) { int weight; if( (x & 15) == (ilow & 15) ) { load_blk( curblk, rowstride, h ); } weight = SIMD_SUFFIX(qblock_sad)(ref, h, rowstride); if( weight <= threshold ) { threshold = intmin(weight<<2,threshold); /* Rough and-ready absolute distance penalty */ /* NOTE: This penalty is *vital* to correct operation as otherwise the sub-mean filtering won't work on very uniform images. */ cres->weight = (uint16_t)weight+((intabs(x)+intabs(y))>>3); cres->x = (uint8_t)x; cres->y = (uint8_t)y; ++cres; } curblk += 1; shift_blk(8); } currowblk += gridrowstride; } emms(); return cres - resvec; } #undef concat