Commit e0c20567 authored by Ronald S. Bultje's avatar Ronald S. Bultje

x86/simple_idct: add explicit sse2 simple_idct_put/add versions.

These use the mmx IDCT, but sse2 put/add_pixels_clamped implementations.
This way we don't need to use the ff_put/add_pixels_clamped function
pointers.
parent 2f0591cf
...@@ -63,6 +63,11 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, ...@@ -63,6 +63,11 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
if (INLINE_MMX(cpu_flags)) { if (INLINE_MMX(cpu_flags)) {
if (!high_bit_depth && if (!high_bit_depth &&
avctx->lowres == 0 && avctx->lowres == 0 &&
...@@ -75,15 +80,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, ...@@ -75,15 +80,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
c->perm_type = FF_IDCT_PERM_SIMPLE; c->perm_type = FF_IDCT_PERM_SIMPLE;
} }
} }
if (EXTERNAL_MMX(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
} }
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
c->put_pixels_clamped = ff_put_pixels_clamped_sse2; c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
c->add_pixels_clamped = ff_add_pixels_clamped_sse2; c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
if (INLINE_SSE2(cpu_flags)) {
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
c->idct_put = ff_simple_idct_put_sse2;
c->idct_add = ff_simple_idct_add_sse2;
c->perm_type = FF_IDCT_PERM_SIMPLE;
}
}
} }
if (ARCH_X86_64 && avctx->lowres == 0) { if (ARCH_X86_64 && avctx->lowres == 0) {
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavcodec/idctdsp.h" #include "libavcodec/idctdsp.h"
#include "libavcodec/x86/idctdsp.h"
#include "idctdsp.h" #include "idctdsp.h"
#include "simple_idct.h" #include "simple_idct.h"
...@@ -907,12 +908,22 @@ void ff_simple_idct_mmx(int16_t *block) ...@@ -907,12 +908,22 @@ void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block) void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{ {
idct(block); idct(block);
ff_put_pixels_clamped(block, dest, line_size); ff_put_pixels_clamped_mmx(block, dest, line_size);
} }
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block) void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{ {
idct(block); idct(block);
ff_add_pixels_clamped(block, dest, line_size); ff_add_pixels_clamped_mmx(block, dest, line_size);
}
void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
idct(block);
ff_put_pixels_clamped_sse2(block, dest, line_size);
}
void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
idct(block);
ff_add_pixels_clamped_sse2(block, dest, line_size);
} }
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
...@@ -26,6 +26,9 @@ void ff_simple_idct_mmx(int16_t *block); ...@@ -26,6 +26,9 @@ void ff_simple_idct_mmx(int16_t *block);
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct10_sse2(int16_t *block); void ff_simple_idct10_sse2(int16_t *block);
void ff_simple_idct10_avx(int16_t *block); void ff_simple_idct10_avx(int16_t *block);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment