Commit 709bb45c authored by Shivraj Patil's avatar Shivraj Patil Committed by Michael Niedermayer

avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for me_cmp functions in new file me_cmp_msa.c
Signed-off-by: 's avatarShivraj Patil <shivraj.patil@imgtec.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 2f3f98af
......@@ -991,4 +991,6 @@ av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
ff_me_cmp_init_ppc(c, avctx);
if (ARCH_X86)
ff_me_cmp_init_x86(c, avctx);
if (ARCH_MIPS)
ff_me_cmp_init_mips(c, avctx);
}
......@@ -87,6 +87,7 @@ void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx);
void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx);
void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx);
void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx);
void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx);
void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type);
......
......@@ -31,6 +31,7 @@ OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_init_mips.o
OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_init_mips.o
OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o
OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_init_mips.o
OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \
mips/hevc_mc_uni_msa.o \
mips/hevc_mc_uniw_msa.o \
......@@ -51,5 +52,6 @@ MSA-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_msa.o
MSA-OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_msa.o
MSA-OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_msa.o
MSA-OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_msa.o
MSA-OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_msa.o
LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o
LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
/*
* Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "me_cmp_mips.h"
#if HAVE_MSA
static av_cold void me_cmp_msa(MECmpContext *c, AVCodecContext *avctx)
{
#if BIT_DEPTH == 8
c->pix_abs[0][0] = ff_pix_abs16_msa;
c->pix_abs[0][1] = ff_pix_abs16_x2_msa;
c->pix_abs[0][2] = ff_pix_abs16_y2_msa;
c->pix_abs[0][3] = ff_pix_abs16_xy2_msa;
c->pix_abs[1][0] = ff_pix_abs8_msa;
c->pix_abs[1][1] = ff_pix_abs8_x2_msa;
c->pix_abs[1][2] = ff_pix_abs8_y2_msa;
c->pix_abs[1][3] = ff_pix_abs8_xy2_msa;
c->hadamard8_diff[0] = ff_hadamard8_diff16_msa;
c->hadamard8_diff[1] = ff_hadamard8_diff8x8_msa;
c->hadamard8_diff[4] = ff_hadamard8_intra16_msa;
c->hadamard8_diff[5] = ff_hadamard8_intra8x8_msa;
c->sad[0] = ff_pix_abs16_msa;
c->sad[1] = ff_pix_abs8_msa;
c->sse[0] = ff_sse16_msa;
c->sse[1] = ff_sse8_msa;
c->sse[2] = ff_sse4_msa;
#endif
}
#endif // #if HAVE_MSA
av_cold void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx)
{
#if HAVE_MSA
me_cmp_msa(c, avctx);
#endif // #if HAVE_MSA
}
/*
* Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_MIPS_ME_CMP_MIPS_H
#define AVCODEC_MIPS_ME_CMP_MIPS_H
#include "../mpegvideo.h"
#include "libavcodec/bit_depth_template.c"
int ff_hadamard8_diff8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h);
int ff_hadamard8_intra8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h);
int ff_hadamard8_diff16_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h);
int ff_hadamard8_intra16_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h);
int ff_pix_abs16_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs16_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs16_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs16_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs8_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs8_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs8_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs8_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_sse16_msa(MpegEncContext *v, uint8_t *pu8Src, uint8_t *pu8Ref,
ptrdiff_t stride, int i32Height);
int ff_sse8_msa(MpegEncContext *v, uint8_t *pu8Src, uint8_t *pu8Ref,
ptrdiff_t stride, int i32Height);
int ff_sse4_msa(MpegEncContext *v, uint8_t *pu8Src, uint8_t *pu8Ref,
ptrdiff_t stride, int i32Height);
void ff_add_pixels8_msa(uint8_t *av_restrict pixels, int16_t *block,
ptrdiff_t stride);
#endif // #ifndef AVCODEC_MIPS_ME_CMP_MIPS_H
This diff is collapsed.
......@@ -1295,6 +1295,29 @@
#define HSUB_UB4_UH(...) HSUB_UB4(v8u16, __VA_ARGS__)
#define HSUB_UB4_SH(...) HSUB_UB4(v8i16, __VA_ARGS__)
/* Description : SAD (Sum of Absolute Difference)
Arguments : Inputs - in0, in1, ref0, ref1 (unsigned byte src & ref)
Outputs - sad_m (halfword vector with sad)
Return Type - unsigned halfword
Details : Absolute difference of all the byte elements from 'in0' with
'ref0' is calculated and preserved in 'diff0'. From the 16
unsigned absolute diff values, even-odd pairs are added
together to generate 8 halfword results.
*/
#define SAD_UB2_UH(in0, in1, ref0, ref1) \
( { \
v16u8 diff0_m, diff1_m; \
v8u16 sad_m = { 0 }; \
\
diff0_m = __msa_asub_u_b((v16u8) in0, (v16u8) ref0); \
diff1_m = __msa_asub_u_b((v16u8) in1, (v16u8) ref1); \
\
sad_m += __msa_hadd_u_h((v16u8) diff0_m, (v16u8) diff0_m); \
sad_m += __msa_hadd_u_h((v16u8) diff1_m, (v16u8) diff1_m); \
\
sad_m; \
} )
/* Description : Insert specified word elements from input vectors to 1
destination vector
Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
......@@ -2429,6 +2452,42 @@
}
#define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
#define TRANSPOSE8x8_UB_UH(...) TRANSPOSE8x8_UB(v8u16, __VA_ARGS__)
/* Description : Transposes 16x4 block into 4x16 with byte elements in vectors
Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
in8, in9, in10, in11, in12, in13, in14, in15
Outputs - out0, out1, out2, out3
Return Type - unsigned byte
Details :
*/
#define TRANSPOSE16x4_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
in8, in9, in10, in11, in12, in13, in14, in15, \
out0, out1, out2, out3) \
{ \
v2i64 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
\
ILVEV_W2_SD(in0, in4, in8, in12, tmp0_m, tmp1_m); \
out1 = (v16u8) __msa_ilvev_d(tmp1_m, tmp0_m); \
\
ILVEV_W2_SD(in1, in5, in9, in13, tmp0_m, tmp1_m); \
out3 = (v16u8) __msa_ilvev_d(tmp1_m, tmp0_m); \
\
ILVEV_W2_SD(in2, in6, in10, in14, tmp0_m, tmp1_m); \
\
tmp2_m = __msa_ilvev_d(tmp1_m, tmp0_m); \
ILVEV_W2_SD(in3, in7, in11, in15, tmp0_m, tmp1_m); \
\
tmp3_m = __msa_ilvev_d(tmp1_m, tmp0_m); \
ILVEV_B2_SD(out1, out3, tmp2_m, tmp3_m, tmp0_m, tmp1_m); \
out0 = (v16u8) __msa_ilvev_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
out2 = (v16u8) __msa_ilvod_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
\
tmp0_m = (v2i64) __msa_ilvod_b((v16i8) out3, (v16i8) out1); \
tmp1_m = (v2i64) __msa_ilvod_b((v16i8) tmp3_m, (v16i8) tmp2_m); \
out1 = (v16u8) __msa_ilvev_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
out3 = (v16u8) __msa_ilvod_h((v8i16) tmp1_m, (v8i16) tmp0_m); \
}
/* Description : Transposes 16x8 block into 8x16 with byte elements in vectors
Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
in8, in9, in10, in11, in12, in13, in14, in15
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment