Commit 12004a9a authored by Anton Khirnov's avatar Anton Khirnov

audiodsp/x86: yasmify vector_clipf_sse

parent 683da86a
...@@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o ...@@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
# GCC inline assembly optimizations # GCC inline assembly optimizations
# subsystems # subsystems
MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o
MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
x86/hpeldsp_mmx.o x86/hpeldsp_mmx.o
......
...@@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 ...@@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
%else %else
VECTOR_CLIP_INT32 6, 1, 0, 0 VECTOR_CLIP_INT32 6, 1, 0, 0
%endif %endif
; void ff_vector_clipf_sse(float *dst, const float *src,
; int len, float min, float max)
INIT_XMM sse
cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
%if ARCH_X86_32
VBROADCASTSS m0, minm
VBROADCASTSS m1, maxm
%elif WIN64
VBROADCASTSS m0, m3
VBROADCASTSS m1, maxm
%else ; 64bit sysv
VBROADCASTSS m0, m0
VBROADCASTSS m1, m1
%endif
movsxdifnidn lenq, lend
.loop
mova m2, [srcq + 4 * lenq - 4 * mmsize]
mova m3, [srcq + 4 * lenq - 3 * mmsize]
mova m4, [srcq + 4 * lenq - 2 * mmsize]
mova m5, [srcq + 4 * lenq - 1 * mmsize]
maxps m2, m0
maxps m3, m0
maxps m4, m0
maxps m5, m0
minps m2, m1
minps m3, m1
minps m4, m1
minps m5, m1
mova [dstq + 4 * lenq - 4 * mmsize], m2
mova [dstq + 4 * lenq - 3 * mmsize], m3
mova [dstq + 4 * lenq - 2 * mmsize], m4
mova [dstq + 4 * lenq - 1 * mmsize], m5
sub lenq, mmsize
jg .loop
RET
...@@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) ...@@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
if (EXTERNAL_MMXEXT(cpu_flags)) if (EXTERNAL_MMXEXT(cpu_flags))
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
if (INLINE_SSE(cpu_flags)) if (EXTERNAL_SSE(cpu_flags))
c->vector_clipf = ff_vector_clipf_sse; c->vector_clipf = ff_vector_clipf_sse;
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
......
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/x86/asm.h"
#include "audiodsp.h"
#if HAVE_INLINE_ASM
void ff_vector_clipf_sse(float *dst, const float *src,
int len, float min, float max)
{
x86_reg i = (len - 16) * 4;
__asm__ volatile (
"movss %3, %%xmm4 \n\t"
"movss %4, %%xmm5 \n\t"
"shufps $0, %%xmm4, %%xmm4 \n\t"
"shufps $0, %%xmm5, %%xmm5 \n\t"
"1: \n\t"
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
"movaps 16(%2, %0), %%xmm1 \n\t"
"movaps 32(%2, %0), %%xmm2 \n\t"
"movaps 48(%2, %0), %%xmm3 \n\t"
"maxps %%xmm4, %%xmm0 \n\t"
"maxps %%xmm4, %%xmm1 \n\t"
"maxps %%xmm4, %%xmm2 \n\t"
"maxps %%xmm4, %%xmm3 \n\t"
"minps %%xmm5, %%xmm0 \n\t"
"minps %%xmm5, %%xmm1 \n\t"
"minps %%xmm5, %%xmm2 \n\t"
"minps %%xmm5, %%xmm3 \n\t"
"movaps %%xmm0, (%1, %0) \n\t"
"movaps %%xmm1, 16(%1, %0) \n\t"
"movaps %%xmm2, 32(%1, %0) \n\t"
"movaps %%xmm3, 48(%1, %0) \n\t"
"sub $64, %0 \n\t"
"jge 1b \n\t"
: "+&r" (i)
: "r" (dst), "r" (src), "m" (min), "m" (max)
: "memory");
}
#endif /* HAVE_INLINE_ASM */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment