2 ;;; predcomp_00_mmxe.s:
4 ;;; Extended MMX prediction composition
5 ;;; routines handling the four different interpolation cases...
7 ;;; Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
10 ;;; This program is free software; you can reaxstribute it and/or
11 ;;; modify it under the terms of the GNU General Public License
12 ;;; as published by the Free Software Foundation; either version 2
13 ;;; of the License, or (at your option) any later version.
15 ;;; This program is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;;; GNU General Public License for more details.
20 ;;; You should have received a copy of the GNU General Public License
21 ;;; along with this program; if not, write to the Free Software
22 ;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
28 ;;; The no interpolation case...
30 global predcomp_00_mmxe
32 ;;; void predcomp_<ix><iy>_mmxe(char *src,char *dst,int lx, int w, int h, int mask);
34 ;;; ix - Interpolation in x iy - Interpolation in y
43 ;;; mm1 = one's mask for src
44 ;;; mm0 = zero mask for src...
49 push ebp ; save frame pointer
58 mov ebx, [ebp+8] ; get psrc
59 mov eax, [ebp+12] ; get pdst
60 mov edx, [ebp+16] ; get lx
61 mov edi, [ebp+20] ; get w
62 mov ecx, [ebp+24] ; get h
64 ;; Extend addflag into bit-mask
71 jmp predrow00 ; align for speed
74 movq mm4, [ebx] ; first 8 bytes of row
87 movq mm4, [ebx+8] ; first 8 bytes of row
98 add eax, edx ; update pointer to next row
101 sub ecx, 1 ; check h left
114 ;;; The x-axis interpolation case...
116 global predcomp_10_mmxe
121 push ebp ; save frame pointer
130 mov ebx, [ebp+8] ; get psrc
131 mov eax, [ebp+12] ; get pdst
132 mov edx, [ebp+16] ; get lx
133 mov edi, [ebp+20] ; get w
134 mov ecx, [ebp+24] ; get h
136 ;; Extend addflag into bit-mask
143 jmp predrow10 ; align for speed
146 movq mm4, [ebx] ; first 8 bytes row: avg src in x
153 pavgb mm4, mm2 ; combine
160 movq mm4, [ebx+8] ; 2nd 8 bytes row: avg src in x
167 pavgb mm4, mm2 ; combine
172 add eax, edx ; update pointer to next row
176 sub ecx, 1 ; check h left
189 ;;; The x-axis and y-axis interpolation case...
191 global predcomp_11_mmxe
197 push ebp ; save frame pointer
209 mov ebx, [ebp+8] ; get psrc
210 mov eax, [ebp+12] ; get pdst
211 mov edx, [ebp+16] ; get lx
212 mov edi, [ebp+20] ; get w
213 mov ecx, [ebp+24] ; get h
215 ;; Extend addflag into bit-mask
223 jmp predrow11 ; align for speed
226 movq mm4, [ebx] ; mm4 and mm6 accumulate partial sums for interp.
238 add ebx, edx ; update pointer to next row
240 movq mm5, [ebx] ; first 8 bytes 1st row: avg src in x
242 punpcklbw mm5, mm2 ; Accumulate partial interpolation
254 ;; Now round and repack...
272 sub ebx, edx ; Back to 1st row
274 movq mm4, [ebx+8] ; mm4 and mm6 accumulate partial sums for interp.
286 add ebx, edx ; update pointer to next row
288 movq mm5, [ebx+8] ; first 8 bytes 1st row: avg src in x
290 punpcklbw mm5, mm2 ; Accumulate partial interpolation
302 ;; Now round and repack...
318 add eax, edx ; update pointer to next row
321 sub ecx, 1 ; check h left
335 ;;; The y-axis interpolation case...
337 global predcomp_01_mmxe
341 push ebp ; save frame pointer
350 mov ebx, [ebp+8] ; get psrc
351 mov eax, [ebp+12] ; get pdst
352 mov edx, [ebp+16] ; get lx
353 mov edi, [ebp+20] ; get w
354 mov ecx, [ebp+24] ; get h
356 ;; Extend addflag into bit-mask
363 jmp predrow01 ; align for speed
366 movq mm4, [ebx] ; first 8 bytes row
367 add ebx, edx ; update pointer to next row
368 pavgb mm4, [ebx] ; Average in y
381 sub ebx, edx ; Back to prev row
382 movq mm4, [ebx+8] ; first 8 bytes row
383 add ebx, edx ; update pointer to next row
384 pavgb mm4, [ebx+8] ; Average in y
395 add eax, edx ; update pointer to next row
398 sub ecx, 1 ; check h left