Commit 86ae0da6 authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: hpeldsp: propagate changes across codecs

Some codecs still use mmx versions, so have them use the versions
with newer instruction sets.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 96470ca2
...@@ -474,6 +474,12 @@ static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ...@@ -474,6 +474,12 @@ static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src,
ff_avg_pixels8_mmx(dst, src, stride, 8); ff_avg_pixels8_mmx(dst, src, stride, 8);
} }
static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_avg_pixels8_mmxext(dst, src, stride, 8);
}
static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride) ptrdiff_t stride)
{ {
...@@ -485,6 +491,24 @@ static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ...@@ -485,6 +491,24 @@ static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src,
{ {
ff_avg_pixels16_mmx(dst, src, stride, 16); ff_avg_pixels16_mmx(dst, src, stride, 16);
} }
static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_avg_pixels16_mmxext(dst, src, stride, 16);
}
static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_put_pixels16_sse2(dst, src, stride, 16);
}
static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_avg_pixels16_sse2(dst, src, stride, 16);
}
#endif #endif
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c,
...@@ -517,15 +541,6 @@ CAVS_MC(put_, 8, mmxext) ...@@ -517,15 +541,6 @@ CAVS_MC(put_, 8, mmxext)
CAVS_MC(put_, 16, mmxext) CAVS_MC(put_, 16, mmxext)
CAVS_MC(avg_, 8, mmxext) CAVS_MC(avg_, 8, mmxext)
CAVS_MC(avg_, 16, mmxext) CAVS_MC(avg_, 16, mmxext)
static av_cold void cavsdsp_init_mmxext(CAVSDSPContext *c,
AVCodecContext *avctx)
{
DSPFUNC(put, 0, 16, mmxext);
DSPFUNC(put, 1, 8, mmxext);
DSPFUNC(avg, 0, 16, mmxext);
DSPFUNC(avg, 1, 8, mmxext);
}
#endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_MMXEXT_INLINE */
#if HAVE_AMD3DNOW_INLINE #if HAVE_AMD3DNOW_INLINE
...@@ -557,7 +572,23 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) ...@@ -557,7 +572,23 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
cavsdsp_init_3dnow(c, avctx); cavsdsp_init_3dnow(c, avctx);
#endif /* HAVE_AMD3DNOW_INLINE */ #endif /* HAVE_AMD3DNOW_INLINE */
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE
if (INLINE_MMXEXT(cpu_flags)) if (INLINE_MMXEXT(cpu_flags)) {
cavsdsp_init_mmxext(c, avctx); DSPFUNC(put, 0, 16, mmxext);
#endif /* HAVE_MMXEXT_INLINE */ DSPFUNC(put, 1, 8, mmxext);
DSPFUNC(avg, 0, 16, mmxext);
DSPFUNC(avg, 1, 8, mmxext);
}
#endif
#if HAVE_MMX_EXTERNAL
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext;
c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
}
#endif
#if HAVE_SSE2_EXTERNAL
if (EXTERNAL_SSE2(cpu_flags)) {
c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
}
#endif
} }
...@@ -83,9 +83,6 @@ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[ ...@@ -83,9 +83,6 @@ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[
}\ }\
} }
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
DIRAC_PIXOP(put, ff_put, mmx) DIRAC_PIXOP(put, ff_put, mmx)
DIRAC_PIXOP(avg, ff_avg, mmx) DIRAC_PIXOP(avg, ff_avg, mmx)
DIRAC_PIXOP(avg, ff_avg, mmxext) DIRAC_PIXOP(avg, ff_avg, mmxext)
......
...@@ -115,9 +115,6 @@ void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, ...@@ -115,9 +115,6 @@ void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
#define ff_put_pixels16_mmxext ff_put_pixels16_mmx #define ff_put_pixels16_mmxext ff_put_pixels16_mmx
#define ff_put_pixels8_mmxext ff_put_pixels8_mmx #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
#define QPEL_OP(OPNAME, RND, MMX) \ #define QPEL_OP(OPNAME, RND, MMX) \
static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \ ptrdiff_t stride) \
......
...@@ -28,6 +28,8 @@ void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ...@@ -28,6 +28,8 @@ void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
......
...@@ -53,9 +53,6 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, ...@@ -53,9 +53,6 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
#define ff_put_pixels8_mmxext ff_put_pixels8_mmx #define ff_put_pixels8_mmxext ff_put_pixels8_mmx
#define ff_put_pixels4_mmxext ff_put_pixels4_mmx #define ff_put_pixels4_mmxext ff_put_pixels4_mmx
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
#define DEF_QPEL(OPNAME)\ #define DEF_QPEL(OPNAME)\
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
......
...@@ -27,12 +27,27 @@ void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, ...@@ -27,12 +27,27 @@ void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
#endif /* AVCODEC_X86_HPELDSP_H */ #endif /* AVCODEC_X86_HPELDSP_H */
...@@ -48,10 +48,6 @@ void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, ...@@ -48,10 +48,6 @@ void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
...@@ -86,8 +82,6 @@ void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ...@@ -86,8 +82,6 @@ void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
...@@ -95,15 +89,6 @@ void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, ...@@ -95,15 +89,6 @@ void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
#define avg_pixels8_mmx ff_avg_pixels8_mmx #define avg_pixels8_mmx ff_avg_pixels8_mmx
#define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx #define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx
#define avg_pixels16_mmx ff_avg_pixels16_mmx #define avg_pixels16_mmx ff_avg_pixels16_mmx
......
...@@ -32,6 +32,13 @@ ...@@ -32,6 +32,13 @@
#include "libavutil/x86/cpu.h" #include "libavutil/x86/cpu.h"
#include "hpeldsp.h" #include "hpeldsp.h"
#define DEFINE_FN(op, size, insn) \
static void op##_rv40_qpel##size##_mc33_##insn(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_##op##_pixels##size##_xy2_##insn(dst, src, stride, size); \
}
#if HAVE_YASM #if HAVE_YASM
void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y); int stride, int h, int x, int y);
...@@ -186,30 +193,24 @@ QPEL_FUNCS_SET (OP, 3, 1, OPT) \ ...@@ -186,30 +193,24 @@ QPEL_FUNCS_SET (OP, 3, 1, OPT) \
QPEL_FUNCS_SET (OP, 3, 2, OPT) QPEL_FUNCS_SET (OP, 3, 2, OPT)
/** @} */ /** @} */
DEFINE_FN(put, 8, ssse3)
DEFINE_FN(put, 16, sse2)
DEFINE_FN(put, 16, ssse3)
DEFINE_FN(avg, 8, mmxext)
DEFINE_FN(avg, 8, ssse3)
DEFINE_FN(avg, 16, sse2)
DEFINE_FN(avg, 16, ssse3)
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
static void put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, DEFINE_FN(put, 8, mmx)
ptrdiff_t stride) DEFINE_FN(avg, 8, mmx)
{ DEFINE_FN(put, 16, mmx)
ff_put_pixels8_xy2_mmx(dst, src, stride, 8); DEFINE_FN(avg, 16, mmx)
} #endif
static void put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_put_pixels16_xy2_mmx(dst, src, stride, 16);
}
static void avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_avg_pixels8_xy2_mmx(dst, src, stride, 8);
}
static void avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_avg_pixels16_xy2_mmx(dst, src, stride, 16);
}
#endif /* HAVE_MMX_INLINE */
av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
{ {
...@@ -240,6 +241,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) ...@@ -240,6 +241,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
#endif #endif
} }
if (EXTERNAL_MMXEXT(cpu_flags)) { if (EXTERNAL_MMXEXT(cpu_flags)) {
c->avg_pixels_tab[1][15] = avg_rv40_qpel8_mc33_mmxext;
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmxext; c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmxext;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmxext; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmxext;
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmxext; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmxext;
...@@ -251,6 +253,8 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) ...@@ -251,6 +253,8 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
#endif #endif
} }
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_sse2;
c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_sse2;
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
...@@ -259,6 +263,10 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) ...@@ -259,6 +263,10 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
QPEL_MC_SET(avg_, _sse2) QPEL_MC_SET(avg_, _sse2)
} }
if (EXTERNAL_SSSE3(cpu_flags)) { if (EXTERNAL_SSSE3(cpu_flags)) {
c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_ssse3;
c->put_pixels_tab[1][15] = put_rv40_qpel8_mc33_ssse3;
c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_ssse3;
c->avg_pixels_tab[1][15] = avg_rv40_qpel8_mc33_ssse3;
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment