4 ;;; Extended MMX prediction composition
5 ;;; routines handling the four different interpolation cases...
7 ;;; Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
10 ;;; This program is free software; you can reaxstribute it and/or
11 ;;; modify it under the terms of the GNU General Public License
12 ;;; as published by the Free Software Foundation; either version 2
13 ;;; of the License, or (at your option) any later version.
15 ;;; This program is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;;; GNU General Public License for more details.
20 ;;; You should have received a copy of the GNU General Public License
21 ;;; along with this program; if not, write to the Free Software
22 ;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
28 ;;; The no interpolation case...
30 global predcomp_00_mmx
32 ;;; void predcomp_<ix><iy>_mmx(char *src,char *dst,int lx, int w, int h, int addflag);
34 ;;; ix - Interpolation in x iy - Interpolation in y
43 ;;; mm1 = one's mask for src
44 ;;; mm0 = zero mask for src...
51 push ebp ; save frame pointer
65 mov ebx, [ebp+8] ; get psrc
66 mov eax, [ebp+12] ; get pdst
67 mov edx, [ebp+16] ; get lx
68 mov edi, [ebp+20] ; get w
69 mov ecx, [ebp+24] ; get h
70 mov esi, [ebp+28] ; get addflag
71 ;; Extend addflag into bit-mask
73 jmp predrow00m ; align for speed
76 movq mm4, [ebx] ; first 8 bytes of row
101 movq mm4, [ebx+8] ; first 8 bytes of row
124 add eax, edx ; update pointer to next row
127 sub ecx, 1 ; check h left
141 ;;; The x-axis interpolation case...
143 global predcomp_10_mmx
148 push ebp ; save frame pointer
162 mov ebx, [ebp+8] ; get psrc
163 mov eax, [ebp+12] ; get pdst
164 mov edx, [ebp+16] ; get lx
165 mov edi, [ebp+20] ; get w
166 mov ecx, [ebp+24] ; get h
167 mov esi, [ebp+28] ; get addflag
168 ;; Extend addflag into bit-mask
170 jmp predrow10m ; align for speed
173 movq mm4, [ebx] ; first 8 bytes of row
182 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
192 movq mm2, [eax] ; Add
196 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
209 movq mm4, [ebx+8] ; first 8 bytes of row
218 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
228 movq mm2, [eax+8] ; Add
232 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
244 add eax, edx ; update pointer to next row
247 sub ecx, 1 ; check h left
260 ;;; The y-axis interpolation case...
262 global predcomp_01_mmx
267 push ebp ; save frame pointer
281 mov ebx, [ebp+8] ; get psrc
282 mov eax, [ebp+12] ; get pdst
283 mov edx, [ebp+16] ; get lx
284 mov edi, [ebp+20] ; get w
285 mov ecx, [ebp+24] ; get h
286 mov esi, [ebp+28] ; get addflag
288 jmp predrow01m ; align for speed
292 movq mm4, [ebx] ; first 8 bytes of row
294 add ebx, edx ; Next row
303 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
313 movq mm2, [eax] ; Add
317 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
330 sub ebx, edx ; Back to first row...
331 movq mm4, [ebx+8] ; first 8 bytes of row
333 add ebx, edx ; Next row
341 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
351 movq mm2, [eax+8] ; Add
355 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
369 sub ecx, 1 ; check h left
383 ;;; The x-axis and y-axis interpolation case...
385 global predcomp_11_mmx
392 push ebp ; save frame pointer
410 mov ebx, [ebp+8] ; get psrc
411 mov eax, [ebp+12] ; get pdst
412 mov edx, [ebp+16] ; get lx
413 mov edi, [ebp+20] ; get w
414 mov ecx, [ebp+24] ; get h
415 mov esi, [ebp+28] ; Addflags
416 ;; Extend addflag into bit-mask
419 jmp predrow11 ; align for speed
422 movq mm4, [ebx] ; mm4 and mm6 accumulate partial sums for interp.
434 add ebx, edx ; update pointer to next row
436 movq mm5, [ebx] ; first 8 bytes 1st row: avg src in x
438 punpcklbw mm5, mm0 ; Accumulate partial interpolation
459 movq mm5, [eax] ; Add
463 paddw mm4, mm5 ; Average mm4/mm6 and mm5/mm7
477 sub ebx, edx ; Back to first row...
479 movq mm4, [ebx+8] ; mm4 and mm6 accumulate partial sums for interp.
491 add ebx, edx ; update pointer to next row
493 movq mm5, [ebx+8] ; first 8 bytes 1st row: avg src in x
495 punpcklbw mm5, mm0 ; Accumulate partial interpolation
516 movq mm5, [eax+8] ; Add and average
520 paddw mm4, mm5 ; Average mm4/mm6 and mm5/mm7
531 add eax, edx ; update pointer to next row
534 sub ecx, 1 ; check h left