Commit 8db00081 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Martin Storsjö

x86: hpeldsp: Move half-pel assembly from dsputil to hpeldsp

Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent 28bc406c
......@@ -53,4 +53,7 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
hpel_funcs(avg, [2], 4);
hpel_funcs(avg, [3], 2);
hpel_funcs(avg_no_rnd,, 16);
if (ARCH_X86)
ff_hpeldsp_init_x86(c, flags);
}
......@@ -94,4 +94,6 @@ typedef struct HpelDSPContext {
void ff_hpeldsp_init(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
#endif /* AVCODEC_HPELDSP_H */
......@@ -10,6 +10,7 @@ OBJS-$(CONFIG_H264CHROMA) += x86/h264chroma_init.o
OBJS-$(CONFIG_H264DSP) += x86/h264dsp_init.o
OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o
OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o
OBJS-$(CONFIG_LPC) += x86/lpc.o
OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodec.o
......@@ -44,7 +45,7 @@ YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \
x86/hpeldsp.o \
x86/fpel.o \
x86/mpeg4qpel.o \
x86/qpel.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o
......@@ -63,7 +64,10 @@ YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \
x86/h264_intrapred_10bit.o
YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \
x86/h264_qpel_10bit.o \
x86/fpel.o \
x86/qpel.o
YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
x86/hpeldsp.o
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
......
This diff is collapsed.
This diff is collapsed.
;******************************************************************************
;* MMX optimized DSP utils
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2003-2013 Michael Niedermayer
;* Copyright (c) 2013 Daniel Kang
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION .text
INIT_MMX mmxext
; void pixels(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PIXELS48 2
%if %2 == 4
%define OP movh
%else
%define OP mova
%endif
cglobal %1_pixels%2, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*3]
.loop:
OP m0, [r1]
OP m1, [r1+r2]
OP m2, [r1+r2*2]
OP m3, [r1+r4]
lea r1, [r1+r2*4]
%ifidn %1, avg
pavgb m0, [r0]
pavgb m1, [r0+r2]
pavgb m2, [r0+r2*2]
pavgb m3, [r0+r4]
%endif
OP [r0], m0
OP [r0+r2], m1
OP [r0+r2*2], m2
OP [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jne .loop
RET
%endmacro
PIXELS48 put, 4
PIXELS48 avg, 4
PIXELS48 put, 8
PIXELS48 avg, 8
INIT_XMM sse2
; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal put_pixels16, 4,5,4
lea r4, [r2*3]
.loop:
movu m0, [r1]
movu m1, [r1+r2]
movu m2, [r1+r2*2]
movu m3, [r1+r4]
lea r1, [r1+r2*4]
mova [r0], m0
mova [r0+r2], m1
mova [r0+r2*2], m2
mova [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jnz .loop
REP_RET
; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal avg_pixels16, 4,5,4
lea r4, [r2*3]
.loop:
movu m0, [r1]
movu m1, [r1+r2]
movu m2, [r1+r2*2]
movu m3, [r1+r4]
lea r1, [r1+r2*4]
pavgb m0, [r0]
pavgb m1, [r0+r2]
pavgb m2, [r0+r2*2]
pavgb m3, [r0+r4]
mova [r0], m0
mova [r0+r2], m1
mova [r0+r2*2], m2
mova [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jnz .loop
REP_RET
This diff is collapsed.
This diff is collapsed.
......@@ -174,83 +174,3 @@ cglobal %1_pixels16_l2, 6,6
INIT_MMX mmxext
PIXELS16_L2 put
PIXELS16_L2 avg
INIT_MMX mmxext
; void pixels(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PIXELS48 2
%if %2 == 4
%define OP movh
%else
%define OP mova
%endif
cglobal %1_pixels%2, 4,5
movsxdifnidn r2, r2d
lea r4, [r2*3]
.loop:
OP m0, [r1]
OP m1, [r1+r2]
OP m2, [r1+r2*2]
OP m3, [r1+r4]
lea r1, [r1+r2*4]
%ifidn %1, avg
pavgb m0, [r0]
pavgb m1, [r0+r2]
pavgb m2, [r0+r2*2]
pavgb m3, [r0+r4]
%endif
OP [r0], m0
OP [r0+r2], m1
OP [r0+r2*2], m2
OP [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jne .loop
RET
%endmacro
PIXELS48 put, 4
PIXELS48 avg, 4
PIXELS48 put, 8
PIXELS48 avg, 8
INIT_XMM sse2
; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal put_pixels16, 4,5,4
lea r4, [r2*3]
.loop:
movu m0, [r1]
movu m1, [r1+r2]
movu m2, [r1+r2*2]
movu m3, [r1+r4]
lea r1, [r1+r2*4]
mova [r0], m0
mova [r0+r2], m1
mova [r0+r2*2], m2
mova [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jnz .loop
REP_RET
; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal avg_pixels16, 4,5,4
lea r4, [r2*3]
.loop:
movu m0, [r1]
movu m1, [r1+r2]
movu m2, [r1+r2*2]
movu m3, [r1+r4]
lea r1, [r1+r2*4]
pavgb m0, [r0]
pavgb m1, [r0+r2]
pavgb m2, [r0+r2*2]
pavgb m3, [r0+r4]
mova [r0], m0
mova [r0+r2], m1
mova [r0+r2*2], m2
mova [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jnz .loop
REP_RET
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment