Commit 7f8417f2 authored by Kaustubh Raste's avatar Kaustubh Raste Committed by Michael Niedermayer

avcodec/mips: Improve hevc uni-w copy mc msa functions

Load the specific destination bytes instead of MSA load and pack.
Pack the data to half word before clipping.
Use immediate unsigned saturation for clip to max saving one vector register.
Signed-off-by: 's avatarKaustubh Raste <kaustubh.raste@imgtec.com>
Reviewed-by: 's avatarManojkumar Bhosale <Manojkumar.Bhosale@imgtec.com>
Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent d8ef5a47
This diff is collapsed.
......@@ -204,6 +204,12 @@
out3 = LW((psrc) + 3 * stride); \
}
#define LW2(psrc, stride, out0, out1) \
{ \
out0 = LW((psrc)); \
out1 = LW((psrc) + stride); \
}
/* Description : Load double words with stride
Arguments : Inputs - psrc (source pointer to load from)
- stride
......@@ -1047,6 +1053,25 @@
CLIP_SH2_0_255(in2, in3); \
}
#define CLIP_SH_0_255_MAX_SATU(in) \
( { \
v8i16 out_m; \
\
out_m = __msa_maxi_s_h((v8i16) in, 0); \
out_m = (v8i16) __msa_sat_u_h((v8u16) out_m, 7); \
out_m; \
} )
#define CLIP_SH2_0_255_MAX_SATU(in0, in1) \
{ \
in0 = CLIP_SH_0_255_MAX_SATU(in0); \
in1 = CLIP_SH_0_255_MAX_SATU(in1); \
}
#define CLIP_SH4_0_255_MAX_SATU(in0, in1, in2, in3) \
{ \
CLIP_SH2_0_255_MAX_SATU(in0, in1); \
CLIP_SH2_0_255_MAX_SATU(in2, in3); \
}
/* Description : Clips all signed word elements of input vector
between 0 & 255
Arguments : Inputs - in (input vector)
......@@ -1965,6 +1990,11 @@
result is in place written to 'in0'
Similar for other pairs
*/
#define SLLI_2V(in0, in1, shift) \
{ \
in0 = in0 << shift; \
in1 = in1 << shift; \
}
#define SLLI_4V(in0, in1, in2, in3, shift) \
{ \
in0 = in0 << shift; \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment