diff --git a/video/filter/vf_eq.c b/video/filter/vf_eq.c index 1bca39b..3cba73b 100644 --- a/video/filter/vf_eq.c +++ b/video/filter/vf_eq.c @@ -133,6 +133,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src, int pel; short brvec[4]; short contvec[4]; + unsigned wcount = w >> 3; // printf("\nmmx: src=%p dst=%p w=%d h=%d ds=%d ss=%d\n",src,dst,w,h,dstride,sstride); @@ -170,7 +171,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src, "decl %%eax \n\t" "jnz 1b \n\t" : "=r" (src), "=r" (dst) - : "0" (src), "1" (dst), "r" (w >> 3), "r" (brvec), "r" (contvec) + : "0" (src), "1" (dst), "g" (wcount), "r" (brvec), "r" (contvec) : "%eax" ); diff --git a/video/filter/vf_gradfun.c b/video/filter/vf_gradfun.c index f7b39fa..c4955ee 100644 --- a/video/filter/vf_gradfun.c +++ b/video/filter/vf_gradfun.c @@ -145,7 +145,7 @@ static void filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, "emms \n" :"+r"(x) :"r"(dst+width), "r"(src+width), "r"(dc+width/2), - "rm"(thresh), "m"(*dithers), "m"(*pw_7f) + "g"(thresh), "m"(*dithers), "m"(*pw_7f) :"memory" ); } diff --git a/video/filter/vf_noise.c b/video/filter/vf_noise.c index c81a1d1..d8220be 100644 --- a/video/filter/vf_noise.c +++ b/video/filter/vf_noise.c @@ -216,13 +216,16 @@ static inline void lineNoise_C(uint8_t *dst, uint8_t *src, int8_t *noise, int le #if HAVE_MMX static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t **shift){ x86_reg mmx_len= len&(~7); + uint8_t *src_mmx_len = src+mmx_len; __asm__ volatile( + "pushl %%"REG_BP" \n\t" + "mov %0, %%"REG_BP" \n\t" "mov %5, %%"REG_a" \n\t" ".align 4 \n\t" "1: \n\t" "movq (%1, %%"REG_a"), %%mm1 \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq (%%"REG_BP", %%"REG_a"), %%mm0 \n\t" "paddb (%2, %%"REG_a"), %%mm1 \n\t" "paddb (%3, %%"REG_a"), %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" @@ -243,7 +246,9 @@ static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t "movq %%mm1, (%4, %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" " js 1b \n\t" - :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), + "popl %%"REG_BP" \n\t" + :: "g" (src_mmx_len), "r" (shift[0]+mmx_len), + "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) : "%"REG_a ); --- ./video/filter/vf_gradfun.c.orig 2013-11-25 10:55:18.192357135 +0000 +++ ./video/filter/vf_gradfun.c 2013-11-25 11:02:13.784535500 +0000 @@ -202,7 +202,10 @@ #if HAVE_SSE2 && HAVE_6REGS #define BLURV(load)\ intptr_t x = -2*width;\ + intptr_t dc_width = dc+width; \ __asm__ volatile(\ + "pushl %%ebp \n"\ + "mov %3, %%ebp \n"\ "movdqa %6, %%xmm7 \n"\ "1: \n"\ load" (%4,%0), %%xmm0 \n"\ @@ -220,13 +223,14 @@ "movdqa (%1,%0), %%xmm1 \n"\ "movdqa %%xmm0, (%1,%0) \n"\ "psubw %%xmm1, %%xmm0 \n"\ - "movdqa %%xmm0, (%3,%0) \n"\ + "movdqa %%xmm0, (%%ebp,%0) \n"\ "add $16, %0 \n"\ "jl 1b \n"\ + "popl %%ebp \n"\ :"+&r"(x)\ :"r"(buf+width),\ "r"(buf1+width),\ - "r"(dc+width),\ + "g"(dc_width),\ "r"(src+width*2),\ "r"(src+width*2+sstride),\ "m"(*pw_ff)\