Commit f4c1a484 authored by James Almer's avatar James Almer

x86/intmath: add sse optimized av_clipf and av_clipd

Reviewed-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
Reviewed-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 9f17d4ae
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#define AVUTIL_X86_INTMATH_H #define AVUTIL_X86_INTMATH_H
#include <stdint.h> #include <stdint.h>
#include <stdlib.h>
#if HAVE_FAST_CLZ #if HAVE_FAST_CLZ
#if defined(_MSC_VER) #if defined(_MSC_VER)
#include <intrin.h> #include <intrin.h>
...@@ -98,6 +99,38 @@ static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigne ...@@ -98,6 +99,38 @@ static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigne
#endif /* __BMI2__ */ #endif /* __BMI2__ */
#if defined(__SSE2__)
#define av_clipd av_clipd_sse2
static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax)
{
#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
if (amin > amax) abort();
#endif
__asm__ ("minsd %2, %0 \n\t"
"maxsd %1, %0 \n\t"
: "+x"(a) : "xm"(amin), "xm"(amax));
return a;
}
#endif /* __SSE2__ */
#if defined(__SSE__)
#define av_clipf av_clipf_sse
static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax)
{
#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
if (amin > amax) abort();
#endif
__asm__ ("minss %2, %0 \n\t"
"maxss %1, %0 \n\t"
: "+x"(a) : "xm"(amin), "xm"(amax));
return a;
}
#endif /* __SSE__ */
#endif /* __GNUC__ */ #endif /* __GNUC__ */
#endif /* AVUTIL_X86_INTMATH_H */ #endif /* AVUTIL_X86_INTMATH_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment