Commit 35ef9801 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '93280623'

* commit '93280623':
  x86: dsputil: Move avg_pixels16_mmx() out of rnd_template.c
  x86: dsputil: Move avg_pixels8_mmx() out of rnd_template.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents ed1697ff 93280623
...@@ -1086,7 +1086,7 @@ void ff_put_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride) ...@@ -1086,7 +1086,7 @@ void ff_put_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride) void ff_avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
avg_pixels8_mmx(dst, src, stride, 8); ff_avg_pixels8_mmx(dst, src, stride, 8);
} }
void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride) void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
...@@ -1096,7 +1096,7 @@ void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride) ...@@ -1096,7 +1096,7 @@ void ff_put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
void ff_avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride) void ff_avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
avg_pixels16_mmx(dst, src, stride, 16); ff_avg_pixels16_mmx(dst, src, stride, 16);
} }
/* VC-1-specific */ /* VC-1-specific */
...@@ -1134,7 +1134,7 @@ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[ ...@@ -1134,7 +1134,7 @@ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
DIRAC_PIXOP(put, ff_put, mmx) DIRAC_PIXOP(put, ff_put, mmx)
DIRAC_PIXOP(avg, avg, mmx) DIRAC_PIXOP(avg, ff_avg, mmx)
#endif #endif
#if HAVE_YASM #if HAVE_YASM
......
...@@ -156,6 +156,10 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_s ...@@ -156,6 +156,10 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_s
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size); void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
......
...@@ -29,6 +29,51 @@ ...@@ -29,6 +29,51 @@
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
// in case more speed is needed - unroling would certainly help
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
{
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
{
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
"movq 8%0, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, 8%0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h) ptrdiff_t line_size, int h)
{ {
......
...@@ -74,8 +74,11 @@ void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, ...@@ -74,8 +74,11 @@ void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
#define avg_pixels8_mmx ff_avg_pixels8_mmx
#define avg_pixels16_mmx ff_avg_pixels16_mmx
#define put_pixels8_mmx ff_put_pixels8_mmx #define put_pixels8_mmx ff_put_pixels8_mmx
#define put_pixels16_mmx ff_put_pixels16_mmx #define put_pixels16_mmx ff_put_pixels16_mmx
#define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
#define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx #define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx
#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx #define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
......
...@@ -92,51 +92,6 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff ...@@ -92,51 +92,6 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff
} }
// avg_pixels // avg_pixels
#ifndef NO_RND
// in case more speed is needed - unroling would certainly help
static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
#endif /* NO_RND */
static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
"movq 8%0, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
OP_AVG(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, 8%0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
// this routine is 'slightly' suboptimal but mostly unused // this routine is 'slightly' suboptimal but mostly unused
static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment