2 ;;; predcomp_00_mmxe.s:
4 ;;; Extended MMX prediction composition
5 ;;; routines handling the four different interpolation cases...
7 ;;; Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
10 ;;; This program is free software; you can reaxstribute it and/or
11 ;;; modify it under the terms of the GNU General Public License
12 ;;; as published by the Free Software Foundation; either version 2
13 ;;; of the License, or (at your option) any later version.
15 ;;; This program is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;;; GNU General Public License for more details.
20 ;;; You should have received a copy of the GNU General Public License
21 ;;; along with this program; if not, write to the Free Software
22 ;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
28 ;;; The no interpolation case...
30 global predcomp_00_mmxe
32 ;;; void predcomp_<ix><iy>_mmxe(char *src,char *dst,int lx, int w, int h, int mask);
34 ;;; ix - Interpolation in x iy - Interpolation in y
43 ;;; mm1 = one's mask for src
44 ;;; mm0 = zero mask for src...
50 push ebp ; save frame pointer
59 mov ebx, [ebp+8] ; get psrc
60 mov eax, [ebp+12] ; get pdst
61 mov edx, [ebp+16] ; get lx
62 mov edi, [ebp+20] ; get w
63 mov ecx, [ebp+24] ; get h
65 ;; Extend addflag into bit-mask
72 jmp predrow00 ; align for speed
75 movq mm4, [ebx] ; first 8 bytes of row
88 movq mm4, [ebx+8] ; first 8 bytes of row
99 add eax, edx ; update pointer to next row
102 sub ecx, 1 ; check h left
115 ;;; The x-axis interpolation case...
117 global predcomp_10_mmxe
122 push ebp ; save frame pointer
131 mov ebx, [ebp+8] ; get psrc
132 mov eax, [ebp+12] ; get pdst
133 mov edx, [ebp+16] ; get lx
134 mov edi, [ebp+20] ; get w
135 mov ecx, [ebp+24] ; get h
137 ;; Extend addflag into bit-mask
144 jmp predrow10 ; align for speed
147 movq mm4, [ebx] ; first 8 bytes row: avg src in x
154 pavgb mm4, mm2 ; combine
161 movq mm4, [ebx+8] ; 2nd 8 bytes row: avg src in x
168 pavgb mm4, mm2 ; combine
173 add eax, edx ; update pointer to next row
177 sub ecx, 1 ; check h left
190 ;;; The x-axis and y-axis interpolation case...
192 global predcomp_11_mmxe
198 push ebp ; save frame pointer
210 mov ebx, [ebp+8] ; get psrc
211 mov eax, [ebp+12] ; get pdst
212 mov edx, [ebp+16] ; get lx
213 mov edi, [ebp+20] ; get w
214 mov ecx, [ebp+24] ; get h
216 ;; Extend addflag into bit-mask
224 jmp predrow11 ; align for speed
227 movq mm4, [ebx] ; mm4 and mm6 accumulate partial sums for interp.
239 add ebx, edx ; update pointer to next row
241 movq mm5, [ebx] ; first 8 bytes 1st row: avg src in x
243 punpcklbw mm5, mm2 ; Accumulate partial interpolation
255 ;; Now round and repack...
273 sub ebx, edx ; Back to 1st row
275 movq mm4, [ebx+8] ; mm4 and mm6 accumulate partial sums for interp.
287 add ebx, edx ; update pointer to next row
289 movq mm5, [ebx+8] ; first 8 bytes 1st row: avg src in x
291 punpcklbw mm5, mm2 ; Accumulate partial interpolation
303 ;; Now round and repack...
319 add eax, edx ; update pointer to next row
322 sub ecx, 1 ; check h left
336 ;;; The y-axis interpolation case...
338 global predcomp_01_mmxe
342 push ebp ; save frame pointer
351 mov ebx, [ebp+8] ; get psrc
352 mov eax, [ebp+12] ; get pdst
353 mov edx, [ebp+16] ; get lx
354 mov edi, [ebp+20] ; get w
355 mov ecx, [ebp+24] ; get h
357 ;; Extend addflag into bit-mask
364 jmp predrow01 ; align for speed
367 movq mm4, [ebx] ; first 8 bytes row
368 add ebx, edx ; update pointer to next row
369 pavgb mm4, [ebx] ; Average in y
382 sub ebx, edx ; Back to prev row
383 movq mm4, [ebx+8] ; first 8 bytes row
384 add ebx, edx ; update pointer to next row
385 pavgb mm4, [ebx+8] ; Average in y
396 add eax, edx ; update pointer to next row
399 sub ecx, 1 ; check h left