Commit 5e947aeb authored by Michael Niedermayer's avatar Michael Niedermayer

sws/x86: improve rounding for yuv2yuvX

This tries to compensate for the errors introduced by
the rounding of pmulhw
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 5ad43af9
...@@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, ...@@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
:: "r"(dither) :: "r"(dither)
); );
} }
filterSize--;
__asm__ volatile( __asm__ volatile(
"pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm0, %%xmm0\n\t"
"punpcklbw %%xmm0, %%xmm3\n\t" "punpcklbw %%xmm0, %%xmm3\n\t"
"psraw $4, %%xmm3\n\t" "movd %0, %%xmm1\n\t"
"punpcklwd %%xmm1, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"punpcklqdq %%xmm1, %%xmm1\n\t"
"psllw $3, %%xmm1\n\t"
"paddw %%xmm1, %%xmm3\n\t"
"psraw $4, %%xmm3\n\t"
::"m"(filterSize)
);
__asm__ volatile(
"movdqa %%xmm3, %%xmm4\n\t" "movdqa %%xmm3, %%xmm4\n\t"
"movdqa %%xmm3, %%xmm7\n\t" "movdqa %%xmm3, %%xmm7\n\t"
"movl %3, %%ecx\n\t" "movl %3, %%ecx\n\t"
......
...@@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, ...@@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
const uint8_t *dither, int offset) const uint8_t *dither, int offset)
{ {
dither_8to16(dither, offset); dither_8to16(dither, offset);
__asm__ volatile(\ filterSize--;
__asm__ volatile(
"movd %0, %%mm1\n\t"
"punpcklwd %%mm1, %%mm1\n\t"
"punpckldq %%mm1, %%mm1\n\t"
"psllw $3, %%mm1\n\t"
"paddw %%mm1, %%mm3\n\t"
"paddw %%mm1, %%mm4\n\t"
"psraw $4, %%mm3\n\t" "psraw $4, %%mm3\n\t"
"psraw $4, %%mm4\n\t" "psraw $4, %%mm4\n\t"
::"m"(filterSize)
);
__asm__ volatile(\
"movq %%mm3, %%mm6\n\t" "movq %%mm3, %%mm6\n\t"
"movq %%mm4, %%mm7\n\t" "movq %%mm4, %%mm7\n\t"
"movl %3, %%ecx\n\t" "movl %3, %%ecx\n\t"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment