Commit 2999bd7d authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: xvid_idct: port SSE2 iDCT to yasm

The main difference consists in renaming properly labels, and
letting yasm select the gprs for skipping 1D transforms.
Previous-version-reviewed-by: 's avatarJames Almer <jamrial@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 8f8c31f4
...@@ -73,8 +73,7 @@ MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o ...@@ -73,8 +73,7 @@ MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o
MMX-OBJS-$(CONFIG_IDCTDSP) += x86/simple_idct.o MMX-OBJS-$(CONFIG_IDCTDSP) += x86/simple_idct.o
# decoders/encoders # decoders/encoders
MMX-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct_mmx.o \ MMX-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct_mmx.o
x86/xvididct_sse2.o
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
...@@ -141,6 +140,7 @@ YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \ ...@@ -141,6 +140,7 @@ YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \
x86/hevc_res_add.o \ x86/hevc_res_add.o \
x86/hevc_sao.o x86/hevc_sao.o
YASM-OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o YASM-OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
YASM-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
......
...@@ -67,9 +67,9 @@ static const struct algo idct_tab_arch[] = { ...@@ -67,9 +67,9 @@ static const struct algo idct_tab_arch[] = {
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE
{ "XVID-MMXEXT", ff_xvid_idct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 }, { "XVID-MMXEXT", ff_xvid_idct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 },
#endif #endif
#if HAVE_SSE2_INLINE #if HAVE_SSE2_EXTERNAL
{ "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
#if ARCH_X86_64 && HAVE_YASM #if ARCH_X86_64
{ "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 }, { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
#endif #endif
#endif #endif
......
This diff is collapsed.
...@@ -26,6 +26,18 @@ ...@@ -26,6 +26,18 @@
#include "idctdsp.h" #include "idctdsp.h"
#include "xvididct.h" #include "xvididct.h"
static void xvid_idct_sse2_put(uint8_t *dest, int line_size, short *block)
{
ff_xvid_idct_sse2(block);
ff_put_pixels_clamped(block, dest, line_size);
}
static void xvid_idct_sse2_add(uint8_t *dest, int line_size, short *block)
{
ff_xvid_idct_sse2(block);
ff_add_pixels_clamped(block, dest, line_size);
}
av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth) unsigned high_bit_depth)
{ {
...@@ -50,9 +62,9 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, ...@@ -50,9 +62,9 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
c->perm_type = FF_IDCT_PERM_NONE; c->perm_type = FF_IDCT_PERM_NONE;
} }
if (INLINE_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
c->idct_put = ff_xvid_idct_sse2_put; c->idct_put = xvid_idct_sse2_put;
c->idct_add = ff_xvid_idct_sse2_add; c->idct_add = xvid_idct_sse2_add;
c->idct = ff_xvid_idct_sse2; c->idct = ff_xvid_idct_sse2;
c->perm_type = FF_IDCT_PERM_SSE2; c->perm_type = FF_IDCT_PERM_SSE2;
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment