Commit f743fa9c authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: huffyuvdsp: add_hfyu_left_pred_bgr32

          C   MMX   SSE2
Cycles: 3092  1053  578
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 7be79c76
;******************************************************************************
;* SIMD-optimized HuffYUV functions
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2014 Christophe Gisquet
;*
;* This file is part of FFmpeg.
;*
......@@ -222,3 +223,41 @@ INIT_MMX mmx
ADD_BYTES
INIT_XMM sse2
ADD_BYTES
; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
; intptr_t w, uint8_t *left)
%macro LEFT_BGR32 0
cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
shl wq, 2
movd m0, [leftq]
lea dstq, [dstq + wq]
lea srcq, [srcq + wq]
LSHIFT m0, mmsize-4
neg wq
.loop:
movu m1, [srcq+wq]
mova m2, m1
%if mmsize == 8
punpckhdq m0, m0
%endif
LSHIFT m1, 4
paddb m1, m2
%if mmsize == 16
pshufd m0, m0, q3333
mova m2, m1
LSHIFT m1, 8
paddb m1, m2
%endif
paddb m0, m1
movu [dstq+wq], m0
add wq, mmsize
jl .loop
movd m0, [dstq-4]
movd [leftq], m0
REP_RET
%endmacro
INIT_MMX mmx
LEFT_BGR32
INIT_XMM sse2
LEFT_BGR32
......@@ -41,6 +41,11 @@ int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src,
intptr_t w, int left);
void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
intptr_t w, uint8_t *left);
void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,
intptr_t w, uint8_t *left);
av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
......@@ -50,8 +55,10 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
#endif
if (EXTERNAL_MMX(cpu_flags))
if (EXTERNAL_MMX(cpu_flags)) {
c->add_bytes = ff_add_bytes_mmx;
c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx;
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
/* slower than cmov version on AMD */
......@@ -62,6 +69,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
if (EXTERNAL_SSE2(cpu_flags)) {
c->add_bytes = ff_add_bytes_sse2;
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment