Commit e3f530fe authored by Ronald S. Bultje's avatar Ronald S. Bultje

prores: idct sse2/sse4 optimizations.

~3.0-3.5x as fast as original C version, 1.6x as fast overall.
parent 6aa3cac6
......@@ -545,8 +545,8 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
if (ctx->qmat_changed || sf != ctx->prev_slice_sf) {
ctx->prev_slice_sf = sf;
for (i = 0; i < 64; i++) {
ctx->qmat_luma_scaled[i] = ctx->qmat_luma[i] * sf;
ctx->qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * sf;
ctx->qmat_luma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_luma[i] * sf;
ctx->qmat_chroma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_chroma[i] * sf;
}
}
......
......@@ -56,6 +56,8 @@ void ff_proresdsp_init(ProresDSPContext *dsp)
dsp->idct_put = prores_idct_put_c;
dsp->idct_permutation_type = FF_NO_IDCT_PERM;
if (HAVE_MMX) ff_proresdsp_x86_init(dsp);
ff_init_scantable_permutation(dsp->idct_permutation,
dsp->idct_permutation_type);
}
......@@ -35,4 +35,6 @@ typedef struct {
void ff_proresdsp_init(ProresDSPContext *dsp);
void ff_proresdsp_x86_init(ProresDSPContext *dsp);
#endif /* AVCODEC_PRORESDSP_H */
......@@ -33,6 +33,8 @@ MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o
MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o
MMX-OBJS-$(CONFIG_LPC) += x86/lpc_mmx.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o
MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o
......
......@@ -64,6 +64,8 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400
DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = {0x0200020002000200ULL, 0x0200020002000200ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019)= {0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x0000000000000000ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x0101010101010101ULL};
......
/*
* Apple ProRes compatible decoder
*
* Copyright (c) 2010-2011 Maxim Poliakovski
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/proresdsp.h"
void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
DCTELEM *block);
void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize,
DCTELEM *block);
void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
DCTELEM *block);
void ff_proresdsp_x86_init(ProresDSPContext *dsp)
{
#if ARCH_X86_64
int flags = av_get_cpu_flags();
if (flags & AV_CPU_FLAG_SSE2) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse2;
}
if (flags & AV_CPU_FLAG_SSE4) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse4;
}
#if HAVE_AVX
if (flags & AV_CPU_FLAG_AVX) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_avx;
}
#endif
#endif
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment