Commit 6a4832ca authored by James Almer's avatar James Almer Committed by Michael Niedermayer

x86/diracdsp: mark all functions as yasm

No inline asm dirac code remains in the tree, so replace every relevant check.
This also moves all the dirac functions from dsputil_mmx.c to diracdsp_mmx.c
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 64bf8455
...@@ -18,8 +18,10 @@ ...@@ -18,8 +18,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "libavutil/x86/cpu.h"
#include "dsputil_x86.h" #include "dsputil_x86.h"
#include "diracdsp_mmx.h" #include "diracdsp_mmx.h"
#include "fpel.h"
void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
...@@ -56,14 +58,76 @@ HPEL_FILTER(16, sse2) ...@@ -56,14 +58,76 @@ HPEL_FILTER(16, sse2)
c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \ c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT
#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
{\
if (h&3)\
ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
else\
OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
}\
void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
{\
if (h&3)\
ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
else\
OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
}\
void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
{\
if (h&3) {\
ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
} else {\
OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
}\
}
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
DIRAC_PIXOP(put, ff_put, mmx)
DIRAC_PIXOP(avg, ff_avg, mmx)
DIRAC_PIXOP(avg, ff_avg, mmxext)
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3)
ff_put_dirac_pixels16_c(dst, src, stride, h);
else
ff_put_pixels16_sse2(dst, src[0], stride, h);
}
void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3)
ff_avg_dirac_pixels16_c(dst, src, stride, h);
else
ff_avg_pixels16_sse2(dst, src[0], stride, h);
}
void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3) {
ff_put_dirac_pixels32_c(dst, src, stride, h);
} else {
ff_put_pixels16_sse2(dst , src[0] , stride, h);
ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
}
}
void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3) {
ff_avg_dirac_pixels32_c(dst, src, stride, h);
} else {
ff_avg_pixels16_sse2(dst , src[0] , stride, h);
ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
}
}
void ff_diracdsp_init_mmx(DiracDSPContext* c) void ff_diracdsp_init_mmx(DiracDSPContext* c)
{ {
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (!(mm_flags & AV_CPU_FLAG_MMX)) if (EXTERNAL_MMX(mm_flags)) {
return;
#if HAVE_YASM
c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
#if !ARCH_X86_64 #if !ARCH_X86_64
c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
...@@ -72,33 +136,25 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c) ...@@ -72,33 +136,25 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
c->add_rect_clamped = ff_add_rect_clamped_mmx; c->add_rect_clamped = ff_add_rect_clamped_mmx;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx; c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
#endif #endif
#endif
#if HAVE_MMX_INLINE
PIXFUNC(put, 0, mmx); PIXFUNC(put, 0, mmx);
PIXFUNC(avg, 0, mmx); PIXFUNC(avg, 0, mmx);
#endif }
#if HAVE_MMXEXT_INLINE if (EXTERNAL_MMXEXT(mm_flags)) {
if (mm_flags & AV_CPU_FLAG_MMX2) {
PIXFUNC(avg, 0, mmxext); PIXFUNC(avg, 0, mmxext);
} }
#endif
if (mm_flags & AV_CPU_FLAG_SSE2) { if (EXTERNAL_SSE2(mm_flags)) {
#if HAVE_YASM
c->dirac_hpel_filter = dirac_hpel_filter_sse2; c->dirac_hpel_filter = dirac_hpel_filter_sse2;
c->add_rect_clamped = ff_add_rect_clamped_sse2; c->add_rect_clamped = ff_add_rect_clamped_sse2;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2; c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
#endif
#if HAVE_SSE2_INLINE
c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2; c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2; c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
#endif
} }
} }
...@@ -29,9 +29,7 @@ ...@@ -29,9 +29,7 @@
#include "libavcodec/pixels.h" #include "libavcodec/pixels.h"
#include "libavcodec/videodsp.h" #include "libavcodec/videodsp.h"
#include "constants.h" #include "constants.h"
#include "diracdsp_mmx.h"
#include "dsputil_x86.h" #include "dsputil_x86.h"
#include "fpel.h"
#include "inline_asm.h" #include "inline_asm.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
...@@ -436,74 +434,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src, ...@@ -436,74 +434,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
} }
#endif #endif
#endif #endif
#if CONFIG_DIRAC_DECODER
#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
{\
if (h&3)\
ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
else\
OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
}\
void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
{\
if (h&3)\
ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
else\
OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
}\
void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
{\
if (h&3) {\
ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
} else {\
OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
}\
}
#if HAVE_YASM
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
DIRAC_PIXOP(put, ff_put, mmx)
DIRAC_PIXOP(avg, ff_avg, mmx)
DIRAC_PIXOP(avg, ff_avg, mmxext)
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3)
ff_put_dirac_pixels16_c(dst, src, stride, h);
else
ff_put_pixels16_sse2(dst, src[0], stride, h);
}
void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3)
ff_avg_dirac_pixels16_c(dst, src, stride, h);
else
ff_avg_pixels16_sse2(dst, src[0], stride, h);
}
void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3) {
ff_put_dirac_pixels32_c(dst, src, stride, h);
} else {
ff_put_pixels16_sse2(dst , src[0] , stride, h);
ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
}
}
void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
{
if (h&3) {
ff_avg_dirac_pixels32_c(dst, src, stride, h);
} else {
ff_avg_pixels16_sse2(dst , src[0] , stride, h);
ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
}
}
#endif
#endif
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment