X-Git-Url: https://git.cinelerra-gg.org/git/?a=blobdiff_plain;f=cinelerra-5.1%2Flibzmpeg3%2Fvideo%2Ftst1.c;fp=cinelerra-5.1%2Flibzmpeg3%2Fvideo%2Ftst1.c;h=6121fef863938b61eb9aa32fcedc6aa59b48ffbf;hb=30bdb85eb33a8ee7ba675038a86c6be59c43d7bd;hp=0000000000000000000000000000000000000000;hpb=52fcc46226f9df46f9ce9d0566dc568455a7db0b;p=goodguy%2Fhistory.git diff --git a/cinelerra-5.1/libzmpeg3/video/tst1.c b/cinelerra-5.1/libzmpeg3/video/tst1.c new file mode 100644 index 00000000..6121fef8 --- /dev/null +++ b/cinelerra-5.1/libzmpeg3/video/tst1.c @@ -0,0 +1,1092 @@ +#include +#include +#include + +#define USE_MMX +#define MMX_ACCURATE + +#ifdef USE_MMX +#include "mmx.h" +#define m_(v) (*(mmx_t*)(((long long *)(v)))) +#ifdef MMX_ACCURATE +static uint32_t sadd1[2] = { 0x00010001, 0x00010001 }; +static uint32_t sadd2[2] = { 0x00020002, 0x00020002 }; +#else +static uint32_t bmask[2] = { 0x7f7f7f7f, 0x7f7f7f7f }; +static uint32_t badd1[2] = { 0x01010101, 0x01010101 }; +#endif +#endif + +static inline void reca(uint8_t *s, uint8_t *d, int lx2, int h) +{ + uint8_t *dp=d, *sp=s; + int j; for( j=0; j> 1; + dp[1] = (uint32_t)(dp[1] + sp[1] + 1) >> 1; + dp[2] = (uint32_t)(dp[2] + sp[2] + 1) >> 1; + dp[3] = (uint32_t)(dp[3] + sp[3] + 1) >> 1; + dp[4] = (uint32_t)(dp[4] + sp[4] + 1) >> 1; + dp[5] = (uint32_t)(dp[5] + sp[5] + 1) >> 1; + dp[6] = (uint32_t)(dp[6] + sp[6] + 1) >> 1; + dp[7] = (uint32_t)(dp[7] + sp[7] + 1) >> 1; + dp[8] = (uint32_t)(dp[8] + sp[8] + 1) >> 1; + dp[9] = (uint32_t)(dp[9] + sp[9] + 1) >> 1; + dp[10] = (uint32_t)(dp[10] + sp[10] + 1) >> 1; + dp[11] = (uint32_t)(dp[11] + sp[11] + 1) >> 1; + dp[12] = (uint32_t)(dp[12] + sp[12] + 1) >> 1; + dp[13] = (uint32_t)(dp[13] + sp[13] + 1) >> 1; + dp[14] = (uint32_t)(dp[14] + sp[14] + 1) >> 1; + dp[15] = (uint32_t)(dp[15] + sp[15] + 1) >> 1; + sp += lx2; dp += lx2; + } +} + +static inline void mreca(uint8_t *s, uint8_t *d, int lx2, int h) +{ + uint8_t *dp=d, *sp=s; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j>1; + dp[1] = (uint32_t)(dp[1] + sp[1] + 1)>>1; + dp[2] = (uint32_t)(dp[2] + sp[2] + 1)>>1; + dp[3] = (uint32_t)(dp[3] + sp[3] + 1)>>1; + dp[4] = (uint32_t)(dp[4] + sp[4] + 1)>>1; + dp[5] = (uint32_t)(dp[5] + sp[5] + 1)>>1; + dp[6] = (uint32_t)(dp[6] + sp[6] + 1)>>1; + dp[7] = (uint32_t)(dp[7] + sp[7] + 1)>>1; + sp += lx2; dp += lx2; + } +} + +static inline void mrecac(uint8_t *s, uint8_t *d, int lx2, int h) +{ + uint8_t *dp=d, *sp=s; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j> 1; + dp[1] = (uint32_t)(sp[1] + sp2[1] + 1) >> 1; + dp[2] = (uint32_t)(sp[2] + sp2[2] + 1) >> 1; + dp[3] = (uint32_t)(sp[3] + sp2[3] + 1) >> 1; + dp[4] = (uint32_t)(sp[4] + sp2[4] + 1) >> 1; + dp[5] = (uint32_t)(sp[5] + sp2[5] + 1) >> 1; + dp[6] = (uint32_t)(sp[6] + sp2[6] + 1) >> 1; + dp[7] = (uint32_t)(sp[7] + sp2[7] + 1) >> 1; + dp[8] = (uint32_t)(sp[8] + sp2[8] + 1) >> 1; + dp[9] = (uint32_t)(sp[9] + sp2[9] + 1) >> 1; + dp[10] = (uint32_t)(sp[10] + sp2[10] + 1) >> 1; + dp[11] = (uint32_t)(sp[11] + sp2[11] + 1) >> 1; + dp[12] = (uint32_t)(sp[12] + sp2[12] + 1) >> 1; + dp[13] = (uint32_t)(sp[13] + sp2[13] + 1) >> 1; + dp[14] = (uint32_t)(sp[14] + sp2[14] + 1) >> 1; + dp[15] = (uint32_t)(sp[15] + sp2[15] + 1) >> 1; + sp += lx2; sp2 += lx2 ; dp += lx2; + } +} + +static inline void mrecv(uint8_t *s, uint8_t *d, int lx, int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j>1; + dp[1] = (uint32_t)(sp[1]+sp2[1]+1)>>1; + dp[2] = (uint32_t)(sp[2]+sp2[2]+1)>>1; + dp[3] = (uint32_t)(sp[3]+sp2[3]+1)>>1; + dp[4] = (uint32_t)(sp[4]+sp2[4]+1)>>1; + dp[5] = (uint32_t)(sp[5]+sp2[5]+1)>>1; + dp[6] = (uint32_t)(sp[6]+sp2[6]+1)>>1; + dp[7] = (uint32_t)(sp[7]+sp2[7]+1)>>1; + sp += lx2; sp2 += lx2; dp += lx2; + } +} + +static inline void mrecvc(uint8_t *s, uint8_t *d, int lx, int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j>1) + 1)>>1; + dp[1] = (dp[1] + ((uint32_t)(sp[1]+sp2[1]+1)>>1) + 1)>>1; + dp[2] = (dp[2] + ((uint32_t)(sp[2]+sp2[2]+1)>>1) + 1)>>1; + dp[3] = (dp[3] + ((uint32_t)(sp[3]+sp2[3]+1)>>1) + 1)>>1; + dp[4] = (dp[4] + ((uint32_t)(sp[4]+sp2[4]+1)>>1) + 1)>>1; + dp[5] = (dp[5] + ((uint32_t)(sp[5]+sp2[5]+1)>>1) + 1)>>1; + dp[6] = (dp[6] + ((uint32_t)(sp[6]+sp2[6]+1)>>1) + 1)>>1; + dp[7] = (dp[7] + ((uint32_t)(sp[7]+sp2[7]+1)>>1) + 1)>>1; + dp[8] = (dp[8] + ((uint32_t)(sp[8]+sp2[8]+1)>>1) + 1)>>1; + dp[9] = (dp[9] + ((uint32_t)(sp[9]+sp2[9]+1)>>1) + 1)>>1; + dp[10] = (dp[10] + ((uint32_t)(sp[10]+sp2[10]+1)>>1) + 1)>>1; + dp[11] = (dp[11] + ((uint32_t)(sp[11]+sp2[11]+1)>>1) + 1)>>1; + dp[12] = (dp[12] + ((uint32_t)(sp[12]+sp2[12]+1)>>1) + 1)>>1; + dp[13] = (dp[13] + ((uint32_t)(sp[13]+sp2[13]+1)>>1) + 1)>>1; + dp[14] = (dp[14] + ((uint32_t)(sp[14]+sp2[14]+1)>>1) + 1)>>1; + dp[15] = (dp[15] + ((uint32_t)(sp[15]+sp2[15]+1)>>1) + 1)>>1; + sp += lx2; sp2 += lx2; dp += lx2; + } +} + +static inline void mrecva(uint8_t *s, uint8_t *d, int lx, int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j>1) + 1)>>1; + dp[1] = (dp[1] + ((uint32_t)(sp[1]+sp2[1]+1)>>1) + 1)>>1; + dp[2] = (dp[2] + ((uint32_t)(sp[2]+sp2[2]+1)>>1) + 1)>>1; + dp[3] = (dp[3] + ((uint32_t)(sp[3]+sp2[3]+1)>>1) + 1)>>1; + dp[4] = (dp[4] + ((uint32_t)(sp[4]+sp2[4]+1)>>1) + 1)>>1; + dp[5] = (dp[5] + ((uint32_t)(sp[5]+sp2[5]+1)>>1) + 1)>>1; + dp[6] = (dp[6] + ((uint32_t)(sp[6]+sp2[6]+1)>>1) + 1)>>1; + dp[7] = (dp[7] + ((uint32_t)(sp[7]+sp2[7]+1)>>1) + 1)>>1; + sp += lx2; sp2 += lx2; dp+= lx2; + } +} + +static inline void mrecvac(uint8_t *s, uint8_t *d, int lx,int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j>1; + dp[1] = (uint32_t)(s2+(s1=sp[2])+1)>>1; + dp[2] = (uint32_t)(s1+(s2=sp[3])+1)>>1; + dp[3] = (uint32_t)(s2+(s1=sp[4])+1)>>1; + dp[4] = (uint32_t)(s1+(s2=sp[5])+1)>>1; + dp[5] = (uint32_t)(s2+(s1=sp[6])+1)>>1; + dp[6] = (uint32_t)(s1+(s2=sp[7])+1)>>1; + dp[7] = (uint32_t)(s2+(s1=sp[8])+1)>>1; + dp[8] = (uint32_t)(s1+(s2=sp[9])+1)>>1; + dp[9] = (uint32_t)(s2+(s1=sp[10])+1)>>1; + dp[10] = (uint32_t)(s1+(s2=sp[11])+1)>>1; + dp[11] = (uint32_t)(s2+(s1=sp[12])+1)>>1; + dp[12] = (uint32_t)(s1+(s2=sp[13])+1)>>1; + dp[13] = (uint32_t)(s2+(s1=sp[14])+1)>>1; + dp[14] = (uint32_t)(s1+(s2=sp[15])+1)>>1; + dp[15] = (uint32_t)(s2+sp[16]+1)>>1; + sp += lx2; dp += lx2; + } +} + +static inline void mrech(uint8_t *s, uint8_t *d, int lx2, int h) +{ + uint8_t *dp=d, *sp=s; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j>1; + dp[1] = (uint32_t)(s2+(s1=sp[2])+1)>>1; + dp[2] = (uint32_t)(s1+(s2=sp[3])+1)>>1; + dp[3] = (uint32_t)(s2+(s1=sp[4])+1)>>1; + dp[4] = (uint32_t)(s1+(s2=sp[5])+1)>>1; + dp[5] = (uint32_t)(s2+(s1=sp[6])+1)>>1; + dp[6] = (uint32_t)(s1+(s2=sp[7])+1)>>1; + dp[7] = (uint32_t)(s2+sp[8]+1)>>1; + sp += lx2; dp += lx2; + } +} + +static inline void mrechc(uint8_t *s, uint8_t *d, int lx2, int h) +{ + uint8_t *dp=d, *sp=s; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j> 1) + 1) >> 1; + dp[1] = (dp[1] + ((uint32_t)(s2 + (s1 = sp[2]) + 1) >> 1) + 1) >> 1; + dp[2] = (dp[2] + ((uint32_t)(s1 + (s2 = sp[3]) + 1) >> 1) + 1) >> 1; + dp[3] = (dp[3] + ((uint32_t)(s2 + (s1 = sp[4]) + 1) >> 1) + 1) >> 1; + dp[4] = (dp[4] + ((uint32_t)(s1 + (s2 = sp[5]) + 1) >> 1) + 1) >> 1; + dp[5] = (dp[5] + ((uint32_t)(s2 + (s1 = sp[6]) + 1) >> 1) + 1) >> 1; + dp[6] = (dp[6] + ((uint32_t)(s1 + (s2 = sp[7]) + 1) >> 1) + 1) >> 1; + dp[7] = (dp[7] + ((uint32_t)(s2 + (s1 = sp[8]) + 1) >> 1) + 1) >> 1; + dp[8] = (dp[8] + ((uint32_t)(s1 + (s2 = sp[9]) + 1) >> 1) + 1) >> 1; + dp[9] = (dp[9] + ((uint32_t)(s2 + (s1 = sp[10]) + 1) >> 1) + 1) >> 1; + dp[10] = (dp[10] + ((uint32_t)(s1 + (s2 = sp[11]) + 1) >> 1) + 1) >> 1; + dp[11] = (dp[11] + ((uint32_t)(s2 + (s1 = sp[12]) + 1) >> 1) + 1) >> 1; + dp[12] = (dp[12] + ((uint32_t)(s1 + (s2 = sp[13]) + 1) >> 1) + 1) >> 1; + dp[13] = (dp[13] + ((uint32_t)(s2 + (s1 = sp[14]) + 1) >> 1) + 1) >> 1; + dp[14] = (dp[14] + ((uint32_t)(s1 + (s2 = sp[15]) + 1) >> 1) + 1) >> 1; + dp[15] = (dp[15] + ((uint32_t)(s2 + sp[16] + 1) >> 1) + 1) >> 1; + sp += lx2; dp += lx2; + } +} + +static inline void mrecha(uint8_t *s, uint8_t *d,int lx2, int h) +{ + uint8_t *dp=d, *sp=s; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j> 1) + 1) >> 1; + dp[1] = (dp[1] + ((uint32_t)(s2 + (s1 = sp[2]) + 1) >> 1) + 1) >> 1; + dp[2] = (dp[2] + ((uint32_t)(s1 + (s2 = sp[3]) + 1) >> 1) + 1) >> 1; + dp[3] = (dp[3] + ((uint32_t)(s2 + (s1 = sp[4]) + 1) >> 1) + 1) >> 1; + dp[4] = (dp[4] + ((uint32_t)(s1 + (s2 = sp[5]) + 1) >> 1) + 1) >> 1; + dp[5] = (dp[5] + ((uint32_t)(s2 + (s1 = sp[6]) + 1) >> 1) + 1) >> 1; + dp[6] = (dp[6] + ((uint32_t)(s1 + (s2 = sp[7]) + 1) >> 1) + 1) >> 1; + dp[7] = (dp[7] + ((uint32_t)(s2 + sp[8] + 1) >> 1) + 1) >> 1; + sp += lx2; dp += lx2; + } +} + +static inline void mrechac(uint8_t *s, uint8_t *d, int lx2, int h) +{ + uint8_t *dp=d, *sp=s; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd1),mm7); + int j; for( j=0; j>2; + dp[1] = (uint32_t)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2; + dp[2] = (uint32_t)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2; + dp[3] = (uint32_t)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2; + dp[4] = (uint32_t)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2; + dp[5] = (uint32_t)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2; + dp[6] = (uint32_t)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2; + dp[7] = (uint32_t)(s2+(s1=sp[8])+s4+(s3=sp2[8])+2)>>2; + dp[8] = (uint32_t)(s1+(s2=sp[9])+s3+(s4=sp2[9])+2)>>2; + dp[9] = (uint32_t)(s2+(s1=sp[10])+s4+(s3=sp2[10])+2)>>2; + dp[10] = (uint32_t)(s1+(s2=sp[11])+s3+(s4=sp2[11])+2)>>2; + dp[11] = (uint32_t)(s2+(s1=sp[12])+s4+(s3=sp2[12])+2)>>2; + dp[12] = (uint32_t)(s1+(s2=sp[13])+s3+(s4=sp2[13])+2)>>2; + dp[13] = (uint32_t)(s2+(s1=sp[14])+s4+(s3=sp2[14])+2)>>2; + dp[14] = (uint32_t)(s1+(s2=sp[15])+s3+(s4=sp2[15])+2)>>2; + dp[15] = (uint32_t)(s2+sp[16]+s4+sp2[16]+2)>>2; + sp += lx2; sp2 += lx2; dp += lx2; + } +} + +static inline void mrec4(uint8_t *s, uint8_t *d, int lx, int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd2),mm7); + int j; for( j=0; j>2; + dp[1] = (uint32_t)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2; + dp[2] = (uint32_t)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2; + dp[3] = (uint32_t)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2; + dp[4] = (uint32_t)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2; + dp[5] = (uint32_t)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2; + dp[6] = (uint32_t)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2; + dp[7] = (uint32_t)(s2+sp[8]+s4+sp2[8]+2)>>2; + sp += lx2; sp2 += lx2; dp += lx2; + } +} + +static inline void mrec4c(uint8_t *s, uint8_t *d, int lx, int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd2),mm7); + int j; for( j=0; j>2) + 1)>>1; + dp[1] = (dp[1] + ((uint32_t)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2) + 1)>>1; + dp[2] = (dp[2] + ((uint32_t)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2) + 1)>>1; + dp[3] = (dp[3] + ((uint32_t)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2) + 1)>>1; + dp[4] = (dp[4] + ((uint32_t)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2) + 1)>>1; + dp[5] = (dp[5] + ((uint32_t)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2) + 1)>>1; + dp[6] = (dp[6] + ((uint32_t)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2) + 1)>>1; + dp[7] = (dp[7] + ((uint32_t)(s2+(s1=sp[8])+s4+(s3=sp2[8])+2)>>2) + 1)>>1; + dp[8] = (dp[8] + ((uint32_t)(s1+(s2=sp[9])+s3+(s4=sp2[9])+2)>>2) + 1)>>1; + dp[9] = (dp[9] + ((uint32_t)(s2+(s1=sp[10])+s4+(s3=sp2[10])+2)>>2) + 1)>>1; + dp[10] = (dp[10] + ((uint32_t)(s1+(s2=sp[11])+s3+(s4=sp2[11])+2)>>2) + 1)>>1; + dp[11] = (dp[11] + ((uint32_t)(s2+(s1=sp[12])+s4+(s3=sp2[12])+2)>>2) + 1)>>1; + dp[12] = (dp[12] + ((uint32_t)(s1+(s2=sp[13])+s3+(s4=sp2[13])+2)>>2) + 1)>>1; + dp[13] = (dp[13] + ((uint32_t)(s2+(s1=sp[14])+s4+(s3=sp2[14])+2)>>2) + 1)>>1; + dp[14] = (dp[14] + ((uint32_t)(s1+(s2=sp[15])+s3+(s4=sp2[15])+2)>>2) + 1)>>1; + dp[15] = (dp[15] + ((uint32_t)(s2+sp[16]+s4+sp2[16]+2)>>2) + 1)>>1; + sp += lx2; sp2 += lx2; dp += lx2; + } +} + +static inline void mrec4a(uint8_t *s, uint8_t *d, int lx, int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd2),mm7); + int j; for( j=0; j>2) + 1)>>1; + dp[1] = (dp[1] + ((uint32_t)(s2+(s1=sp[2])+s4+(s3=sp2[2])+2)>>2) + 1)>>1; + dp[2] = (dp[2] + ((uint32_t)(s1+(s2=sp[3])+s3+(s4=sp2[3])+2)>>2) + 1)>>1; + dp[3] = (dp[3] + ((uint32_t)(s2+(s1=sp[4])+s4+(s3=sp2[4])+2)>>2) + 1)>>1; + dp[4] = (dp[4] + ((uint32_t)(s1+(s2=sp[5])+s3+(s4=sp2[5])+2)>>2) + 1)>>1; + dp[5] = (dp[5] + ((uint32_t)(s2+(s1=sp[6])+s4+(s3=sp2[6])+2)>>2) + 1)>>1; + dp[6] = (dp[6] + ((uint32_t)(s1+(s2=sp[7])+s3+(s4=sp2[7])+2)>>2) + 1)>>1; + dp[7] = (dp[7] + ((uint32_t)(s2+sp[8]+s4+sp2[8]+2)>>2) + 1)>>1; + sp += lx2; sp2 += lx2; dp += lx2; + } +} + +static inline void mrec4ac(uint8_t *s, uint8_t *d, int lx, int lx2, int h) +{ + uint8_t *dp=d, *sp=s, *sp2=s+lx; + pxor_r2r(mm0,mm0); + movq_m2r(m_(sadd2),mm7); + int j; for( j=0; j