Commit 6e76e6a0 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit 'b545179f'

* commit 'b545179f':
  x86: lpc: simd av_evaluate_lls

Conflicts:
	libavutil/x86/lls.asm
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents a285079b b545179f
...@@ -196,3 +196,40 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2 ...@@ -196,3 +196,40 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
.ret: .ret:
REP_RET REP_RET
%endif %endif
INIT_XMM sse2
cglobal evaluate_lls, 2,4,2, ctx, var, order, i
; This function is often called on the same buffer as update_lls, but with
; an offset. They can't both be aligned.
; Load halves rather than movu to avoid store-forwarding stalls, since the
; input was initialized immediately prior to this function using scalar math.
%define coefsq ctxq
mov id, orderd
imul orderd, MAX_VARS
lea coefsq, [ctxq + LLSModel.coeff + orderq*8]
movsd m0, [varq]
movhpd m0, [varq + 8]
mulpd m0, [coefsq]
lea coefsq, [coefsq + iq*8]
lea varq, [varq + iq*8]
neg iq
add iq, 2
.loop:
movsd m1, [varq + iq*8]
movhpd m1, [varq + iq*8 + 8]
mulpd m1, [coefsq + iq*8]
addpd m0, m1
add iq, 2
jl .loop
jg .skip1
movsd m1, [varq + iq*8]
mulsd m1, [coefsq + iq*8]
addpd m0, m1
.skip1:
movhlps m1, m0
addsd m0, m1
%if ARCH_X86_32
movsd r0m, m0
fld qword r0m
%endif
RET
...@@ -25,12 +25,15 @@ ...@@ -25,12 +25,15 @@
void ff_update_lls_sse2(LLSModel *m, double *var); void ff_update_lls_sse2(LLSModel *m, double *var);
void ff_update_lls_avx(LLSModel *m, double *var); void ff_update_lls_avx(LLSModel *m, double *var);
double ff_evaluate_lls_sse2(LLSModel *m, double *var, int order);
av_cold void ff_init_lls_x86(LLSModel *m) av_cold void ff_init_lls_x86(LLSModel *m)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
m->update_lls = ff_update_lls_sse2; m->update_lls = ff_update_lls_sse2;
if (m->indep_count >= 4)
m->evaluate_lls = ff_evaluate_lls_sse2;
} }
if (EXTERNAL_AVX(cpu_flags) && 0) { if (EXTERNAL_AVX(cpu_flags) && 0) {
m->update_lls = ff_update_lls_avx; m->update_lls = ff_update_lls_avx;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment