Commit d1a32c3f authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: kill fpel_mmx.c

Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 19e66c72
......@@ -50,13 +50,11 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
x86/fpel_mmx.o \
x86/idct_mmx_xvid.o \
x86/idct_sse2_xvid.o \
x86/simple_idct.o
MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
x86/hpeldsp_mmx.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_mmx.o
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o
......
......@@ -461,7 +461,7 @@ static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *
#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
#if HAVE_MMX_INLINE
#if HAVE_MMX_EXTERNAL
static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
......@@ -485,19 +485,23 @@ static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src,
{
ff_avg_pixels16_mmx(dst, src, stride, 16);
}
#endif
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c,
AVCodecContext *avctx)
{
#if HAVE_MMX_EXTERNAL
c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx;
c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx;
c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx;
#endif
#if HAVE_MMX_INLINE
c->cavs_idct8_add = cavs_idct8_add_mmx;
c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
}
#endif /* HAVE_MMX_INLINE */
}
#define DSPFUNC(PFX, IDX, NUM, EXT) \
c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
......@@ -545,12 +549,9 @@ static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
{
#if HAVE_MMX_INLINE
int cpu_flags = av_get_cpu_flags();
if (INLINE_MMX(cpu_flags))
cavsdsp_init_mmx(c, avctx);
#endif /* HAVE_MMX_INLINE */
cavsdsp_init_mmx(c, avctx);
#if HAVE_AMD3DNOW_INLINE
if (INLINE_AMD3DNOW(cpu_flags))
cavsdsp_init_3dnow(c, avctx);
......
......@@ -73,8 +73,8 @@ void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
int order);
......@@ -112,8 +112,11 @@ void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
#if HAVE_YASM
CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
#define ff_put_pixels8_mmxext ff_put_pixels8_mmx
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
#define QPEL_OP(OPNAME, RND, MMX) \
static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
......
......@@ -554,13 +554,12 @@ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[
}\
}
#if HAVE_MMX_INLINE
CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
#if HAVE_YASM
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
DIRAC_PIXOP(put, ff_put, mmx)
DIRAC_PIXOP(avg, ff_avg, mmx)
#endif
#if HAVE_YASM
DIRAC_PIXOP(avg, ff_avg, mmxext)
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
......
......@@ -25,85 +25,83 @@
SECTION .text
INIT_MMX mmxext
%macro PAVGB_MMX 4
LOAD %3, %1
por %3, %2
pxor %2, %1
pand %2, %4
psrlq %2, 1
psubb %3, %2
SWAP %2, %3
%endmacro
; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
; ptrdiff_t line_size, int h)
%macro PIXELS48 2
%if %2 == 4
%define OP movh
%macro OP_PIXELS 2
%if %2 == mmsize/2
%define LOAD movh
%define SAVE movh
%define LEN mmsize
%else
%define OP mova
%define LOAD movu
%define SAVE mova
%define LEN %2
%endif
cglobal %1_pixels%2, 4,5
cglobal %1_pixels%2, 4,5,4
movsxdifnidn r2, r2d
lea r4, [r2*3]
%ifidn %1, avg
%if notcpuflag(mmxext)
pcmpeqd m6, m6
paddb m6, m6
%endif
%endif
.loop:
OP m0, [r1]
OP m1, [r1+r2]
OP m2, [r1+r2*2]
OP m3, [r1+r4]
lea r1, [r1+r2*4]
%assign %%i 0
%rep LEN/mmsize
LOAD m0, [r1 + %%i]
LOAD m1, [r1+r2 + %%i]
LOAD m2, [r1+r2*2 + %%i]
LOAD m3, [r1+r4 + %%i]
%ifidn %1, avg
pavgb m0, [r0]
pavgb m1, [r0+r2]
pavgb m2, [r0+r2*2]
pavgb m3, [r0+r4]
%if notcpuflag(mmxext)
PAVGB_MMX [r0 + %%i], m0, m4, m6
PAVGB_MMX [r0+r2 + %%i], m1, m5, m6
PAVGB_MMX [r0+r2*2 + %%i], m2, m4, m6
PAVGB_MMX [r0+r4 + %%i], m3, m5, m6
%else
pavgb m0, [r0 + %%i]
pavgb m1, [r0+r2 + %%i]
pavgb m2, [r0+r2*2 + %%i]
pavgb m3, [r0+r4 + %%i]
%endif
%endif
OP [r0], m0
OP [r0+r2], m1
OP [r0+r2*2], m2
OP [r0+r4], m3
SAVE [r0 + %%i], m0
SAVE [r0+r2 + %%i], m1
SAVE [r0+r2*2 + %%i], m2
SAVE [r0+r4 + %%i], m3
%assign %%i %%i+mmsize
%endrep
sub r3d, 4
lea r1, [r1+r2*4]
lea r0, [r0+r2*4]
jne .loop
RET
%endmacro
PIXELS48 put, 4
PIXELS48 avg, 4
PIXELS48 put, 8
PIXELS48 avg, 8
INIT_MMX mmx
OP_PIXELS put, 4
OP_PIXELS avg, 4
OP_PIXELS put, 8
OP_PIXELS avg, 8
OP_PIXELS put, 16
OP_PIXELS avg, 16
INIT_MMX mmxext
OP_PIXELS avg, 4
OP_PIXELS avg, 8
OP_PIXELS avg, 16
INIT_XMM sse2
; void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
; ptrdiff_t line_size, int h)
cglobal put_pixels16, 4,5,4
lea r4, [r2*3]
.loop:
movu m0, [r1]
movu m1, [r1+r2]
movu m2, [r1+r2*2]
movu m3, [r1+r4]
lea r1, [r1+r2*4]
mova [r0], m0
mova [r0+r2], m1
mova [r0+r2*2], m2
mova [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jnz .loop
REP_RET
; void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
; ptrdiff_t line_size, int h)
cglobal avg_pixels16, 4,5,4
lea r4, [r2*3]
.loop:
movu m0, [r1]
movu m1, [r1+r2]
movu m2, [r1+r2*2]
movu m3, [r1+r4]
lea r1, [r1+r2*4]
pavgb m0, [r0]
pavgb m1, [r0+r2]
pavgb m2, [r0+r2*2]
pavgb m3, [r0+r4]
mova [r0], m0
mova [r0+r2], m1
mova [r0+r2*2], m2
mova [r0+r4], m3
sub r3d, 4
lea r0, [r0+r2*4]
jnz .loop
REP_RET
OP_PIXELS put, 16
OP_PIXELS avg, 16
/*
* MMX-optimized avg/put pixel routines
*
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stddef.h>
#include <stdint.h>
#include "config.h"
#include "fpel.h"
#include "inline_asm.h"
#if HAVE_MMX_INLINE
// in case more speed is needed - unrolling would certainly help
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
{
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
{
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm__ volatile(
"movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, %0 \n\t"
"movq 8%0, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
"movq %%mm2, 8%0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
}
while (--h);
}
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
{
__asm__ volatile (
"lea (%3, %3), %%"REG_a" \n\t"
".p2align 3 \n\t"
"1: \n\t"
"movq (%1 ), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1 ), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
: "+g"(h), "+r"(pixels), "+r"(block)
: "r"((x86_reg)line_size)
: "%"REG_a, "memory"
);
}
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
{
__asm__ volatile (
"lea (%3, %3), %%"REG_a" \n\t"
".p2align 3 \n\t"
"1: \n\t"
"movq (%1 ), %%mm0 \n\t"
"movq 8(%1 ), %%mm4 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq 8(%1, %3), %%mm5 \n\t"
"movq %%mm0, (%2) \n\t"
"movq %%mm4, 8(%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"movq (%1 ), %%mm0 \n\t"
"movq 8(%1 ), %%mm4 \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq 8(%1, %3), %%mm5 \n\t"
"movq %%mm0, (%2) \n\t"
"movq %%mm4, 8(%2) \n\t"
"movq %%mm1, (%2, %3) \n\t"
"movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
: "+g"(h), "+r"(pixels), "+r"(block)
: "r"((x86_reg)line_size)
: "%"REG_a, "memory"
);
}
#endif /* HAVE_MMX_INLINE */
......@@ -29,8 +29,8 @@
#include "fpel.h"
#if HAVE_YASM
void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
......@@ -49,9 +49,12 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
#define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext
#define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
#define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
#define ff_put_pixels8_mmxext ff_put_pixels8_mmx
#define ff_put_pixels4_mmxext ff_put_pixels4_mmx
CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
#define DEF_QPEL(OPNAME)\
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\
......
......@@ -165,15 +165,17 @@ HPELDSP_AVG_PIXELS16(_mmxext)
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
do { \
if (HAVE_MMX_EXTERNAL) \
c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
if (HAVE_MMX_INLINE) { \
c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
} \
} while (0)
static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
{
#if HAVE_MMX_INLINE
SET_HPEL_FUNCS(put, [0], 16, mmx);
SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
SET_HPEL_FUNCS(avg, [0], 16, mmx);
......@@ -181,7 +183,6 @@ static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
SET_HPEL_FUNCS(put, [1], 8, mmx);
SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx);
SET_HPEL_FUNCS(avg, [1], 8, mmx);
#endif /* HAVE_MMX_INLINE */
}
static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
......
......@@ -728,6 +728,7 @@ static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
);
}
#if HAVE_MMX_EXTERNAL
static void put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd)
{
......@@ -748,6 +749,7 @@ static void avg_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
{
ff_avg_pixels16_mmx(dst, src, stride, 16);
}
#endif
#define FN_ASSIGN(OP, X, Y, INSN) \
dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
......@@ -755,7 +757,10 @@ static void avg_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
{
#if HAVE_MMX_EXTERNAL
FN_ASSIGN(put_, 0, 0, _mmx);
FN_ASSIGN(avg_, 0, 0, _mmx);
#endif
FN_ASSIGN(put_, 0, 1, _mmx);
FN_ASSIGN(put_, 0, 2, _mmx);
FN_ASSIGN(put_, 0, 3, _mmx);
......@@ -774,8 +779,6 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
FN_ASSIGN(put_, 3, 1, _mmx);
FN_ASSIGN(put_, 3, 2, _mmx);
FN_ASSIGN(put_, 3, 3, _mmx);
FN_ASSIGN(avg_, 0, 0, _mmx);
}
av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment