+++ /dev/null
-ADD_1: dd 01010101h, 01010101h
-MASK_AND: dd 7f7f7f7fh, 7f7f7f7fh
-PLUS_384: dd 01800180h, 01800180h
-PLUS_128: dd 00800080h, 00800080h
-
-%assign LocalFrameSize 0
-%assign RegisterStorageSize 16
-
-; Arguments:
-%assign source LocalFrameSize + RegisterStorageSize + 4
-%assign dest LocalFrameSize + RegisterStorageSize + 8
-%assign lx2 LocalFrameSize + RegisterStorageSize + 12
-%assign h LocalFrameSize + RegisterStorageSize + 16
-
-; Locals (on local stack frame)
-
-
-; extern void C rec_mmx (
-; unsigned char *source,
-; unsigned char *dest,
-; int lx2,
-; int h
-;
-; The local variables are on the stack,
-;
-
-global recva_mmx
-global recvac_mmx
-global rech_mmx
-global rechc_mmx
-global add_block_mmx
-global set_block_mmx
-
-
- align 16
-rech_mmx:
- push esi
- push edi
- push ecx
- push ebx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
-.rech1:
- movq mm0,[esi]
- movq mm1,[esi+1]
- movq mm2,[esi+8]
- movq mm3,[esi+9]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm2,1
- psrlw mm3,1
- pand mm0,mm5
- pand mm1,mm5
- pand mm2,mm5
- pand mm3,mm5
- paddusb mm0,mm1
- paddusb mm2,mm3
- paddusb mm0,mm6
- paddusb mm2,mm6
- movq [edi],mm0
- add esi,ebx
- movq [edi+8],mm2
- add edi,ebx
- dec ecx
- jnz .rech1
- emms
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
-
- align 16
-rechc_mmx:
- push esi
- push edi
- push ecx
- push ebx
-; sub esp, LocalFrameSize
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
-.rechc1:
- movq mm0,[esi]
- movq mm1,[esi+1]
- psrlw mm0,1
- psrlw mm1,1
- pand mm0,mm5
- pand mm1,mm5
- paddusb mm0,mm1
- paddusb mm0,mm6
- movq [edi],mm0
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz .rechc1
- emms
-; add esp, LocalFrameSize
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
-
-
-
-%assign RegisterStorageSize 20
-%assign source LocalFrameSize + RegisterStorageSize + 4
-%assign dest LocalFrameSize + RegisterStorageSize + 8
-%assign lx LocalFrameSize + RegisterStorageSize + 12
-%assign lx2 LocalFrameSize + RegisterStorageSize + 16
-%assign h LocalFrameSize + RegisterStorageSize + 20
-
- align 16
-recva_mmx:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- mov edx, [esp+lx]
- movq mm7, [MASK_AND]
- movq mm6, [ADD_1]
-.recva1:
- movq mm0,[esi]
- movq mm1,[esi+edx]
- movq mm2,[esi+8]
- movq mm3,[esi+edx+8]
- movq mm4,[edi]
- movq mm5,[edi+8]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm2,1
- psrlw mm3,1
- psrlw mm4,1
- psrlw mm5,1
- pand mm0,mm7
- pand mm1,mm7
- pand mm2,mm7
- pand mm3,mm7
- pand mm4,mm7
- pand mm5,mm7
- paddusb mm0,mm1
- paddusb mm2,mm3
- paddusb mm0,mm6
- paddusb mm2,mm6
- psrlw mm0,1
- psrlw mm2,1
- pand mm0,mm7
- pand mm2,mm7
- paddusb mm4,mm0
- paddusb mm5,mm2
- paddusb mm4,mm6
- paddusb mm5,mm6
- movq [edi],mm4
- movq [edi+8],mm5
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz near .recva1
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
-
- align 16
-recvac_mmx:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- mov edx, [esp+lx]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
-.recvac1:
- movq mm0,[esi]
- movq mm1,[esi+edx]
- movq mm4,[edi]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm4,1
- pand mm0,mm5
- pand mm1,mm5
- pand mm4,mm5
- paddusb mm0,mm1
- paddusb mm0,mm6
- psrlw mm0,1
- pand mm0,mm5
- paddusb mm4,mm0
- paddusb mm4,mm6
- movq [edi],mm4
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz .recvac1
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
-
-%assign RegisterStorageSize 20
-%assign rfp LocalFrameSize + RegisterStorageSize + 4
-%assign bp LocalFrameSize + RegisterStorageSize + 8
-%assign iincr LocalFrameSize + RegisterStorageSize + 12
-
-; FIXME clipping needs to be done
-
- align 16
-add_block_mmx:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+bp]
- mov edi, [esp+rfp]
- mov ebx, [esp+iincr]
-; movq mm7, [PLUS_384]
- mov ecx,8
- pxor mm2,mm2 ; clear
-%rep 8
- movq mm0, [edi] ; get dest
- movq mm1,mm0
- punpcklbw mm0,mm2
- punpckhbw mm1,mm2
- paddsw mm0, [esi]
- paddsw mm1, [esi+8]
-; paddsw mm0, mm7
-; paddsw mm1, mm7
- packuswb mm0,mm1
- movq [edi], mm0
- add edi,ebx
- add esi,16
-%endrep
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
-
- align 16
-set_block_mmx:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+bp]
- mov edi, [esp+rfp]
- mov ebx, [esp+iincr]
- movq mm7, [PLUS_128]
-%rep 4
- movq mm0, [esi]
- movq mm1, [esi+8]
- paddsw mm0, mm7
- movq mm2, [esi+16]
- paddsw mm1, mm7
- movq mm3, [esi+24]
- paddsw mm2, mm7
- packuswb mm0, mm1
- paddsw mm3, mm7
- movq [edi], mm0
- packuswb mm2, mm3
- add edi, ebx
- add esi, 32
- movq [edi], mm2
- add edi, ebx
-%endrep
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
-
-