Commit 157d6f0d authored by James Cowgill's avatar James Cowgill Committed by Michael Niedermayer

mips: port optimizations to mips n64

This mainly consists of replacing all the pointer arithmatic 'addiu'
instructions with PTR_ADDIU which will handle the differences in pointer
sizes when compiled on 64 bit mips systems.

The header asmdefs.h contains the PTR_ macros which expend to the correct mips
instructions to manipulate registers containing pointers.
Signed-off-by: 's avatarJames Cowgill <james410@cowgill.org.uk>
Reviewed-by: 's avatarNedeljko Babic <Nedeljko.Babic@imgtec.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent eae13eae
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include "aacdec_mips.h" #include "aacdec_mips.h"
#include "libavcodec/aactab.h" #include "libavcodec/aactab.h"
#include "libavcodec/sinewin.h" #include "libavcodec/sinewin.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static av_always_inline void float_copy(float *dst, const float *src, int count) static av_always_inline void float_copy(float *dst, const float *src, int count)
...@@ -80,7 +81,7 @@ static av_always_inline void float_copy(float *dst, const float *src, int count) ...@@ -80,7 +81,7 @@ static av_always_inline void float_copy(float *dst, const float *src, int count)
"lw %[temp5], 20(%[src]) \n\t" "lw %[temp5], 20(%[src]) \n\t"
"lw %[temp6], 24(%[src]) \n\t" "lw %[temp6], 24(%[src]) \n\t"
"lw %[temp7], 28(%[src]) \n\t" "lw %[temp7], 28(%[src]) \n\t"
"addiu %[src], %[src], 32 \n\t" PTR_ADDIU "%[src], %[src], 32 \n\t"
"sw %[temp0], 0(%[dst]) \n\t" "sw %[temp0], 0(%[dst]) \n\t"
"sw %[temp1], 4(%[dst]) \n\t" "sw %[temp1], 4(%[dst]) \n\t"
"sw %[temp2], 8(%[dst]) \n\t" "sw %[temp2], 8(%[dst]) \n\t"
...@@ -90,7 +91,7 @@ static av_always_inline void float_copy(float *dst, const float *src, int count) ...@@ -90,7 +91,7 @@ static av_always_inline void float_copy(float *dst, const float *src, int count)
"sw %[temp6], 24(%[dst]) \n\t" "sw %[temp6], 24(%[dst]) \n\t"
"sw %[temp7], 28(%[dst]) \n\t" "sw %[temp7], 28(%[dst]) \n\t"
"bne %[src], %[loop_end], 1b \n\t" "bne %[src], %[loop_end], 1b \n\t"
"addiu %[dst], %[dst], 32 \n\t" PTR_ADDIU "%[dst], %[dst], 32 \n\t"
".set pop \n\t" ".set pop \n\t"
: [temp0]"=&r"(temp[0]), [temp1]"=&r"(temp[1]), : [temp0]"=&r"(temp[0]), [temp1]"=&r"(temp[1]),
...@@ -250,7 +251,7 @@ static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -250,7 +251,7 @@ static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"sw $0, 4(%[p_predTime]) \n\t" "sw $0, 4(%[p_predTime]) \n\t"
"sw $0, 8(%[p_predTime]) \n\t" "sw $0, 8(%[p_predTime]) \n\t"
"sw $0, 12(%[p_predTime]) \n\t" "sw $0, 12(%[p_predTime]) \n\t"
"addiu %[p_predTime], %[p_predTime], 16 \n\t" PTR_ADDIU "%[p_predTime], %[p_predTime], 16 \n\t"
: [p_predTime]"+r"(p_predTime) : [p_predTime]"+r"(p_predTime)
: :
...@@ -261,7 +262,7 @@ static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -261,7 +262,7 @@ static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
__asm__ volatile ( __asm__ volatile (
"sw $0, 0(%[p_predTime]) \n\t" "sw $0, 0(%[p_predTime]) \n\t"
"addiu %[p_predTime], %[p_predTime], 4 \n\t" PTR_ADDIU "%[p_predTime], %[p_predTime], 4 \n\t"
: [p_predTime]"+r"(p_predTime) : [p_predTime]"+r"(p_predTime)
: :
...@@ -315,9 +316,9 @@ static av_always_inline void fmul_and_reverse(float *dst, const float *src0, con ...@@ -315,9 +316,9 @@ static av_always_inline void fmul_and_reverse(float *dst, const float *src0, con
"swc1 %[temp9], 4(%[ptr1]) \n\t" "swc1 %[temp9], 4(%[ptr1]) \n\t"
"swc1 %[temp10], 8(%[ptr1]) \n\t" "swc1 %[temp10], 8(%[ptr1]) \n\t"
"swc1 %[temp11], 12(%[ptr1]) \n\t" "swc1 %[temp11], 12(%[ptr1]) \n\t"
"addiu %[ptr1], %[ptr1], 16 \n\t" PTR_ADDIU "%[ptr1], %[ptr1], 16 \n\t"
"addiu %[ptr2], %[ptr2], -16 \n\t" PTR_ADDIU "%[ptr2], %[ptr2], -16 \n\t"
"addiu %[ptr3], %[ptr3], -16 \n\t" PTR_ADDIU "%[ptr3], %[ptr3], -16 \n\t"
: [temp0]"=&f"(temp[0]), [temp1]"=&f"(temp[1]), : [temp0]"=&f"(temp[0]), [temp1]"=&f"(temp[1]),
[temp2]"=&f"(temp[2]), [temp3]"=&f"(temp[3]), [temp2]"=&f"(temp[2]), [temp3]"=&f"(temp[3]),
...@@ -358,7 +359,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -358,7 +359,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"sw $0, 20(%[p_saved_ltp]) \n\t" "sw $0, 20(%[p_saved_ltp]) \n\t"
"sw $0, 24(%[p_saved_ltp]) \n\t" "sw $0, 24(%[p_saved_ltp]) \n\t"
"sw $0, 28(%[p_saved_ltp]) \n\t" "sw $0, 28(%[p_saved_ltp]) \n\t"
"addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t" PTR_ADDIU "%[p_saved_ltp],%[p_saved_ltp], 32 \n\t"
"bne %[p_saved_ltp], %[loop_end1], 1b \n\t" "bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
: [p_saved_ltp]"+r"(p_saved_ltp) : [p_saved_ltp]"+r"(p_saved_ltp)
...@@ -386,7 +387,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -386,7 +387,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"lw %[temp5], 20(%[src]) \n\t" "lw %[temp5], 20(%[src]) \n\t"
"lw %[temp6], 24(%[src]) \n\t" "lw %[temp6], 24(%[src]) \n\t"
"lw %[temp7], 28(%[src]) \n\t" "lw %[temp7], 28(%[src]) \n\t"
"addiu %[src], %[src], 32 \n\t" PTR_ADDIU "%[src], %[src], 32 \n\t"
"sw %[temp0], 0(%[dst]) \n\t" "sw %[temp0], 0(%[dst]) \n\t"
"sw %[temp1], 4(%[dst]) \n\t" "sw %[temp1], 4(%[dst]) \n\t"
"sw %[temp2], 8(%[dst]) \n\t" "sw %[temp2], 8(%[dst]) \n\t"
...@@ -404,7 +405,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) ...@@ -404,7 +405,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"sw $0, 2328(%[dst]) \n\t" "sw $0, 2328(%[dst]) \n\t"
"sw $0, 2332(%[dst]) \n\t" "sw $0, 2332(%[dst]) \n\t"
"bne %[src], %[loop_end], 1b \n\t" "bne %[src], %[loop_end], 1b \n\t"
" addiu %[dst], %[dst], 32 \n\t" PTR_ADDIU "%[dst], %[dst], 32 \n\t"
".set pop \n\t" ".set pop \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
......
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#define AVCODEC_MIPS_AACDEC_FLOAT_H #define AVCODEC_MIPS_AACDEC_FLOAT_H
#include "libavcodec/aac.h" #include "libavcodec/aac.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM && HAVE_MIPSFPU #if HAVE_INLINE_ASM && HAVE_MIPSFPU
static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx, static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
...@@ -77,7 +78,7 @@ static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx, ...@@ -77,7 +78,7 @@ static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
"lwxc1 %[temp1], %[temp4](%[v]) \n\t" "lwxc1 %[temp1], %[temp4](%[v]) \n\t"
"mul.s %[temp0], %[temp0], %[temp2] \n\t" "mul.s %[temp0], %[temp0], %[temp2] \n\t"
"mul.s %[temp1], %[temp1], %[temp2] \n\t" "mul.s %[temp1], %[temp1], %[temp2] \n\t"
"addiu %[ret], %[dst], 8 \n\t" PTR_ADDIU "%[ret], %[dst], 8 \n\t"
"swc1 %[temp0], 0(%[dst]) \n\t" "swc1 %[temp0], 0(%[dst]) \n\t"
"swc1 %[temp1], 4(%[dst]) \n\t" "swc1 %[temp1], 4(%[dst]) \n\t"
...@@ -115,7 +116,7 @@ static inline float *VMUL4_mips(float *dst, const float *v, unsigned idx, ...@@ -115,7 +116,7 @@ static inline float *VMUL4_mips(float *dst, const float *v, unsigned idx,
"mul.s %[temp6], %[temp6], %[temp4] \n\t" "mul.s %[temp6], %[temp6], %[temp4] \n\t"
"mul.s %[temp7], %[temp7], %[temp4] \n\t" "mul.s %[temp7], %[temp7], %[temp4] \n\t"
"mul.s %[temp8], %[temp8], %[temp4] \n\t" "mul.s %[temp8], %[temp8], %[temp4] \n\t"
"addiu %[ret], %[dst], 16 \n\t" PTR_ADDIU "%[ret], %[dst], 16 \n\t"
"swc1 %[temp5], 0(%[dst]) \n\t" "swc1 %[temp5], 0(%[dst]) \n\t"
"swc1 %[temp6], 4(%[dst]) \n\t" "swc1 %[temp6], 4(%[dst]) \n\t"
"swc1 %[temp7], 8(%[dst]) \n\t" "swc1 %[temp7], 8(%[dst]) \n\t"
...@@ -157,7 +158,7 @@ static inline float *VMUL2S_mips(float *dst, const float *v, unsigned idx, ...@@ -157,7 +158,7 @@ static inline float *VMUL2S_mips(float *dst, const float *v, unsigned idx,
"mtc1 %[temp4], %[temp7] \n\t" "mtc1 %[temp4], %[temp7] \n\t"
"mul.s %[temp8], %[temp8], %[temp6] \n\t" "mul.s %[temp8], %[temp8], %[temp6] \n\t"
"mul.s %[temp9], %[temp9], %[temp7] \n\t" "mul.s %[temp9], %[temp9], %[temp7] \n\t"
"addiu %[ret], %[dst], 8 \n\t" PTR_ADDIU "%[ret], %[dst], 8 \n\t"
"swc1 %[temp8], 0(%[dst]) \n\t" "swc1 %[temp8], 0(%[dst]) \n\t"
"swc1 %[temp9], 4(%[dst]) \n\t" "swc1 %[temp9], 4(%[dst]) \n\t"
...@@ -220,7 +221,7 @@ static inline float *VMUL4S_mips(float *dst, const float *v, unsigned idx, ...@@ -220,7 +221,7 @@ static inline float *VMUL4S_mips(float *dst, const float *v, unsigned idx,
"mul.s %[temp11], %[temp11], %[temp15] \n\t" "mul.s %[temp11], %[temp11], %[temp15] \n\t"
"mul.s %[temp12], %[temp12], %[temp16] \n\t" "mul.s %[temp12], %[temp12], %[temp16] \n\t"
"mul.s %[temp13], %[temp13], %[temp17] \n\t" "mul.s %[temp13], %[temp13], %[temp17] \n\t"
"addiu %[ret], %[dst], 16 \n\t" PTR_ADDIU "%[ret], %[dst], 16 \n\t"
"swc1 %[temp10], 0(%[dst]) \n\t" "swc1 %[temp10], 0(%[dst]) \n\t"
"swc1 %[temp11], 4(%[dst]) \n\t" "swc1 %[temp11], 4(%[dst]) \n\t"
"swc1 %[temp12], 8(%[dst]) \n\t" "swc1 %[temp12], 8(%[dst]) \n\t"
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include "config.h" #include "config.h"
#include "libavcodec/aacpsdsp.h" #include "libavcodec/aacpsdsp.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64], static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64],
...@@ -86,8 +87,8 @@ static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][6 ...@@ -86,8 +87,8 @@ static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][6
"sw %[temp5], 20(%[out1]) \n\t" "sw %[temp5], 20(%[out1]) \n\t"
"sw %[temp6], 24(%[out1]) \n\t" "sw %[temp6], 24(%[out1]) \n\t"
"sw %[temp7], 28(%[out1]) \n\t" "sw %[temp7], 28(%[out1]) \n\t"
"addiu %[out1], %[out1], 32 \n\t" PTR_ADDIU "%[out1], %[out1], 32 \n\t"
"addiu %[L1], %[L1], 1024 \n\t" PTR_ADDIU "%[L1], %[L1], 1024 \n\t"
"bne %[out1], %[j], 1b \n\t" "bne %[out1], %[j], 1b \n\t"
: [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j), : [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j),
...@@ -128,10 +129,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64], ...@@ -128,10 +129,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
"lw %[temp5], 16(%[in2]) \n\t" "lw %[temp5], 16(%[in2]) \n\t"
"lw %[temp6], 24(%[in1]) \n\t" "lw %[temp6], 24(%[in1]) \n\t"
"lw %[temp7], 24(%[in2]) \n\t" "lw %[temp7], 24(%[in2]) \n\t"
"addiu %[out1], %[out1], 1024 \n\t" PTR_ADDIU "%[out1], %[out1], 1024 \n\t"
"addiu %[out2], %[out2], 1024 \n\t" PTR_ADDIU "%[out2], %[out2], 1024 \n\t"
"addiu %[in1], %[in1], 32 \n\t" PTR_ADDIU "%[in1], %[in1], 32 \n\t"
"addiu %[in2], %[in2], 32 \n\t" PTR_ADDIU "%[in2], %[in2], 32 \n\t"
"sw %[temp0], -1024(%[out1]) \n\t" "sw %[temp0], -1024(%[out1]) \n\t"
"sw %[temp1], -1024(%[out2]) \n\t" "sw %[temp1], -1024(%[out2]) \n\t"
"sw %[temp2], -768(%[out1]) \n\t" "sw %[temp2], -768(%[out1]) \n\t"
...@@ -161,10 +162,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64], ...@@ -161,10 +162,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
"lw %[temp5], 16(%[in2]) \n\t" "lw %[temp5], 16(%[in2]) \n\t"
"lw %[temp6], 24(%[in1]) \n\t" "lw %[temp6], 24(%[in1]) \n\t"
"lw %[temp7], 24(%[in2]) \n\t" "lw %[temp7], 24(%[in2]) \n\t"
"addiu %[out1], %[out1], -7164 \n\t" PTR_ADDIU "%[out1], %[out1], -7164 \n\t"
"addiu %[out2], %[out2], -7164 \n\t" PTR_ADDIU "%[out2], %[out2], -7164 \n\t"
"addiu %[in1], %[in1], 32 \n\t" PTR_ADDIU "%[in1], %[in1], 32 \n\t"
"addiu %[in2], %[in2], 32 \n\t" PTR_ADDIU "%[in2], %[in2], 32 \n\t"
"sw %[temp0], 7164(%[out1]) \n\t" "sw %[temp0], 7164(%[out1]) \n\t"
"sw %[temp1], 7164(%[out2]) \n\t" "sw %[temp1], 7164(%[out2]) \n\t"
"sw %[temp2], 7420(%[out1]) \n\t" "sw %[temp2], 7420(%[out1]) \n\t"
...@@ -226,8 +227,8 @@ static void ps_add_squares_mips(float *dst, const float (*src)[2], int n) ...@@ -226,8 +227,8 @@ static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
"swc1 %[temp2], 4(%[dst0]) \n\t" "swc1 %[temp2], 4(%[dst0]) \n\t"
"swc1 %[temp4], 8(%[dst0]) \n\t" "swc1 %[temp4], 8(%[dst0]) \n\t"
"swc1 %[temp6], 12(%[dst0]) \n\t" "swc1 %[temp6], 12(%[dst0]) \n\t"
"addiu %[dst0], %[dst0], 16 \n\t" PTR_ADDIU "%[dst0], %[dst0], 16 \n\t"
"addiu %[src0], %[src0], 32 \n\t" PTR_ADDIU "%[src0], %[src0], 32 \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
...@@ -257,14 +258,14 @@ static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *sr ...@@ -257,14 +258,14 @@ static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *sr
"lwc1 %[temp2], 0(%[p_s1]) \n\t" "lwc1 %[temp2], 0(%[p_s1]) \n\t"
"lwc1 %[temp0], 0(%[p_s0]) \n\t" "lwc1 %[temp0], 0(%[p_s0]) \n\t"
"lwc1 %[temp1], 4(%[p_s0]) \n\t" "lwc1 %[temp1], 4(%[p_s0]) \n\t"
"addiu %[p_d], %[p_d], 8 \n\t" PTR_ADDIU "%[p_d], %[p_d], 8 \n\t"
"mul.s %[temp0], %[temp0], %[temp2] \n\t" "mul.s %[temp0], %[temp0], %[temp2] \n\t"
"mul.s %[temp1], %[temp1], %[temp2] \n\t" "mul.s %[temp1], %[temp1], %[temp2] \n\t"
"addiu %[p_s0], %[p_s0], 8 \n\t" PTR_ADDIU "%[p_s0], %[p_s0], 8 \n\t"
"swc1 %[temp0], -8(%[p_d]) \n\t" "swc1 %[temp0], -8(%[p_d]) \n\t"
"swc1 %[temp1], -4(%[p_d]) \n\t" "swc1 %[temp1], -4(%[p_d]) \n\t"
"bne %[p_s1], %[end], 1b \n\t" "bne %[p_s1], %[end], 1b \n\t"
" addiu %[p_s1], %[p_s1], 4 \n\t" PTR_ADDIU "%[p_s1], %[p_s1], 4 \n\t"
".set pop \n\t" ".set pop \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
...@@ -355,13 +356,13 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2], ...@@ -355,13 +356,13 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
"mul.s %[temp1], %[ag2], %[temp3] \n\t" "mul.s %[temp1], %[ag2], %[temp3] \n\t"
"lwc1 %[temp4], 0(%[p_t_gain]) \n\t" "lwc1 %[temp4], 0(%[p_t_gain]) \n\t"
"sub.s %[temp0], %[temp8], %[temp0] \n\t" "sub.s %[temp0], %[temp8], %[temp0] \n\t"
"addiu %[p_ap_delay], %[p_ap_delay], 8 \n\t" PTR_ADDIU "%[p_ap_delay], %[p_ap_delay], 8 \n\t"
"sub.s %[temp1], %[temp9], %[temp1] \n\t" "sub.s %[temp1], %[temp9], %[temp1] \n\t"
"addiu %[p_t_gain], %[p_t_gain], 4 \n\t" PTR_ADDIU "%[p_t_gain], %[p_t_gain], 4 \n\t"
"madd.s %[temp2], %[temp2], %[ag2], %[temp0] \n\t" "madd.s %[temp2], %[temp2], %[ag2], %[temp0] \n\t"
"addiu %[p_delay], %[p_delay], 8 \n\t" PTR_ADDIU "%[p_delay], %[p_delay], 8 \n\t"
"madd.s %[temp3], %[temp3], %[ag2], %[temp1] \n\t" "madd.s %[temp3], %[temp3], %[ag2], %[temp1] \n\t"
"addiu %[p_out], %[p_out], 8 \n\t" PTR_ADDIU "%[p_out], %[p_out], 8 \n\t"
"mul.s %[temp5], %[temp4], %[temp0] \n\t" "mul.s %[temp5], %[temp4], %[temp0] \n\t"
"mul.s %[temp6], %[temp4], %[temp1] \n\t" "mul.s %[temp6], %[temp4], %[temp1] \n\t"
"swc1 %[temp2], 624(%[p_ap_delay]) \n\t" "swc1 %[temp2], 624(%[p_ap_delay]) \n\t"
...@@ -414,9 +415,9 @@ static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2], ...@@ -414,9 +415,9 @@ static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
"add.s %[h3], %[h3], %[hs3] \n\t" "add.s %[h3], %[h3], %[hs3] \n\t"
"lwc1 %[r_im], 4(%[r]) \n\t" "lwc1 %[r_im], 4(%[r]) \n\t"
"mul.s %[temp0], %[h0], %[l_re] \n\t" "mul.s %[temp0], %[h0], %[l_re] \n\t"
"addiu %[l], %[l], 8 \n\t" PTR_ADDIU "%[l], %[l], 8 \n\t"
"mul.s %[temp2], %[h1], %[l_re] \n\t" "mul.s %[temp2], %[h1], %[l_re] \n\t"
"addiu %[r], %[r], 8 \n\t" PTR_ADDIU "%[r], %[r], 8 \n\t"
"madd.s %[temp0], %[temp0], %[h2], %[r_re] \n\t" "madd.s %[temp0], %[temp0], %[h2], %[r_re] \n\t"
"madd.s %[temp2], %[temp2], %[h3], %[r_re] \n\t" "madd.s %[temp2], %[temp2], %[h3], %[r_re] \n\t"
"mul.s %[temp1], %[h0], %[l_im] \n\t" "mul.s %[temp1], %[h0], %[l_im] \n\t"
......
...@@ -56,6 +56,8 @@ ...@@ -56,6 +56,8 @@
#ifndef AVCODEC_MIPS_AACPSY_MIPS_H #ifndef AVCODEC_MIPS_AACPSY_MIPS_H
#define AVCODEC_MIPS_AACPSY_MIPS_H #define AVCODEC_MIPS_AACPSY_MIPS_H
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 ) #if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands, static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
AacPsyChannel *pch, const uint8_t *band_sizes, AacPsyChannel *pch, const uint8_t *band_sizes,
...@@ -185,7 +187,7 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float ...@@ -185,7 +187,7 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
"madd.s %[sum1], %[sum1], $f8, %[coeff3] \n\t" "madd.s %[sum1], %[sum1], $f8, %[coeff3] \n\t"
"madd.s %[sum2], %[sum2], $f11, %[coeff3] \n\t" "madd.s %[sum2], %[sum2], $f11, %[coeff3] \n\t"
"lwc1 $f1, 36(%[fb]) \n\t" "lwc1 $f1, 36(%[fb]) \n\t"
"addiu %[fb], %[fb], 16 \n\t" PTR_ADDIU "%[fb], %[fb], 16 \n\t"
"madd.s %[sum4], %[sum4], $f0, %[coeff3] \n\t" "madd.s %[sum4], %[sum4], $f0, %[coeff3] \n\t"
"madd.s %[sum3], %[sum3], $f1, %[coeff3] \n\t" "madd.s %[sum3], %[sum3], $f1, %[coeff3] \n\t"
"madd.s %[sum1], %[sum1], $f1, %[coeff4] \n\t" "madd.s %[sum1], %[sum1], $f1, %[coeff4] \n\t"
...@@ -207,7 +209,7 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float ...@@ -207,7 +209,7 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
"swc1 %[sum4], 12(%[hp]) \n\t" "swc1 %[sum4], 12(%[hp]) \n\t"
"swc1 %[sum3], 8(%[hp]) \n\t" "swc1 %[sum3], 8(%[hp]) \n\t"
"bne %[fb], %[fb_end], 1b \n\t" "bne %[fb], %[fb_end], 1b \n\t"
" addiu %[hp], %[hp], 16 \n\t" PTR_ADDIU "%[hp], %[hp], 16 \n\t"
".set pop \n\t" ".set pop \n\t"
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include "libavcodec/aac.h" #include "libavcodec/aac.h"
#include "libavcodec/aacsbr.h" #include "libavcodec/aacsbr.h"
#include "libavutil/mips/asmdefs.h"
#define ENVELOPE_ADJUSTMENT_OFFSET 2 #define ENVELOPE_ADJUSTMENT_OFFSET 2
...@@ -81,9 +82,9 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr, ...@@ -81,9 +82,9 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
"sw $0, 20(%[p_x1_low]) \n\t" "sw $0, 20(%[p_x1_low]) \n\t"
"sw $0, 24(%[p_x1_low]) \n\t" "sw $0, 24(%[p_x1_low]) \n\t"
"sw $0, 28(%[p_x1_low]) \n\t" "sw $0, 28(%[p_x1_low]) \n\t"
"addiu %[p_x1_low], %[p_x1_low], 32 \n\t" PTR_ADDIU "%[p_x1_low],%[p_x1_low], 32 \n\t"
"bne %[p_x1_low], %[loop_end], 1b \n\t" "bne %[p_x1_low], %[loop_end], 1b \n\t"
"addiu %[p_x1_low], %[p_x1_low], -10240 \n\t" PTR_ADDIU "%[p_x1_low],%[p_x1_low], -10240 \n\t"
: [p_x1_low]"+r"(p_x1_low) : [p_x1_low]"+r"(p_x1_low)
: [loop_end]"r"(loop_end) : [loop_end]"r"(loop_end)
...@@ -110,8 +111,8 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr, ...@@ -110,8 +111,8 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
"sw %[temp5], 20(%[p_x_low]) \n\t" "sw %[temp5], 20(%[p_x_low]) \n\t"
"sw %[temp6], 24(%[p_x_low]) \n\t" "sw %[temp6], 24(%[p_x_low]) \n\t"
"sw %[temp7], 28(%[p_x_low]) \n\t" "sw %[temp7], 28(%[p_x_low]) \n\t"
"addiu %[p_x_low], %[p_x_low], 32 \n\t" PTR_ADDIU "%[p_x_low], %[p_x_low], 32 \n\t"
"addiu %[p_w], %[p_w], 1024 \n\t" PTR_ADDIU "%[p_w], %[p_w], 1024 \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
...@@ -147,8 +148,8 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr, ...@@ -147,8 +148,8 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
"sw %[temp5], 20(%[p_x1_low]) \n\t" "sw %[temp5], 20(%[p_x1_low]) \n\t"
"sw %[temp6], 24(%[p_x1_low]) \n\t" "sw %[temp6], 24(%[p_x1_low]) \n\t"
"sw %[temp7], 28(%[p_x1_low]) \n\t" "sw %[temp7], 28(%[p_x1_low]) \n\t"
"addiu %[p_x1_low], %[p_x1_low], 32 \n\t" PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32 \n\t"
"addiu %[p_w1], %[p_w1], 1024 \n\t" PTR_ADDIU "%[p_w1], %[p_w1], 1024 \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
...@@ -188,9 +189,9 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64], ...@@ -188,9 +189,9 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"sw $0, 20(%[x1]) \n\t" "sw $0, 20(%[x1]) \n\t"
"sw $0, 24(%[x1]) \n\t" "sw $0, 24(%[x1]) \n\t"
"sw $0, 28(%[x1]) \n\t" "sw $0, 28(%[x1]) \n\t"
"addiu %[x1], %[x1], 32 \n\t" PTR_ADDIU "%[x1],%[x1], 32 \n\t"
"bne %[x1], %[j], 1b \n\t" "bne %[x1], %[j], 1b \n\t"
"addiu %[x1], %[x1], -19456 \n\t" PTR_ADDIU "%[x1],%[x1], -19456 \n\t"
: [x1]"+r"(x1) : [x1]"+r"(x1)
: [j]"r"(j) : [j]"r"(j)
...@@ -210,8 +211,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64], ...@@ -210,8 +211,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[X_low1]) \n\t" "lw %[temp1], 4(%[X_low1]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t" "sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t" "sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t" PTR_ADDIU "%[x1], %[x1], 256 \n\t"
"addiu %[X_low1], %[X_low1], 8 \n\t" PTR_ADDIU "%[X_low1], %[X_low1], 8 \n\t"
"addiu %[i], %[i], 1 \n\t" "addiu %[i], %[i], 1 \n\t"
"bne %[i], %[i_Temp], 2b \n\t" "bne %[i], %[i_Temp], 2b \n\t"
...@@ -235,8 +236,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64], ...@@ -235,8 +236,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[Y01]) \n\t" "lw %[temp1], 4(%[Y01]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t" "sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t" "sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t" PTR_ADDIU "%[x1], %[x1], 256 \n\t"
"addiu %[Y01], %[Y01], 512 \n\t" PTR_ADDIU "%[Y01], %[Y01], 512 \n\t"
"addiu %[i], %[i], 1 \n\t" "addiu %[i], %[i], 1 \n\t"
"bne %[i], %[i_Temp], 3b \n\t" "bne %[i], %[i_Temp], 3b \n\t"
...@@ -263,8 +264,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64], ...@@ -263,8 +264,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[X_low1]) \n\t" "lw %[temp1], 4(%[X_low1]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t" "sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t" "sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t" PTR_ADDIU "%[x1], %[x1], 256 \n\t"
"addiu %[X_low1], %[X_low1], 8 \n\t" PTR_ADDIU "%[X_low1],%[X_low1], 8 \n\t"
"addiu %[i], %[i], 1 \n\t" "addiu %[i], %[i], 1 \n\t"
"bne %[i], %[temp3], 4b \n\t" "bne %[i], %[temp3], 4b \n\t"
...@@ -291,8 +292,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64], ...@@ -291,8 +292,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[Y11]) \n\t" "lw %[temp1], 4(%[Y11]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t" "sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t" "sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t" PTR_ADDIU "%[x1], %[x1], 256 \n\t"
"addiu %[Y11], %[Y11], 512 \n\t" PTR_ADDIU "%[Y11], %[Y11], 512 \n\t"
"addiu %[i], %[i], 1 \n\t" "addiu %[i], %[i], 1 \n\t"
"bne %[i], %[temp2], 5b \n\t" "bne %[i], %[temp2], 5b \n\t"
...@@ -370,10 +371,10 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2], ...@@ -370,10 +371,10 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2],
"sw %[temp2], 4(%[q_temp1]) \n\t" "sw %[temp2], 4(%[q_temp1]) \n\t"
"sw %[temp3], 8(%[q_temp1]) \n\t" "sw %[temp3], 8(%[q_temp1]) \n\t"
"sw %[temp4], 12(%[q_temp1]) \n\t" "sw %[temp4], 12(%[q_temp1]) \n\t"
"addiu %[pok], %[pok], 16 \n\t" PTR_ADDIU "%[pok], %[pok], 16 \n\t"
"addiu %[g_temp1], %[g_temp1], 16 \n\t" PTR_ADDIU "%[g_temp1], %[g_temp1], 16 \n\t"
"addiu %[pok1], %[pok1], 16 \n\t" PTR_ADDIU "%[pok1], %[pok1], 16 \n\t"
"addiu %[q_temp1], %[q_temp1], 16 \n\t" PTR_ADDIU "%[q_temp1], %[q_temp1], 16 \n\t"
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
...@@ -390,10 +391,10 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2], ...@@ -390,10 +391,10 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2],
"lw %[temp2], 0(%[pok1]) \n\t" "lw %[temp2], 0(%[pok1]) \n\t"
"sw %[temp1], 0(%[g_temp1]) \n\t" "sw %[temp1], 0(%[g_temp1]) \n\t"
"sw %[temp2], 0(%[q_temp1]) \n\t" "sw %[temp2], 0(%[q_temp1]) \n\t"
"addiu %[pok], %[pok], 4 \n\t" PTR_ADDIU "%[pok], %[pok], 4 \n\t"
"addiu %[g_temp1], %[g_temp1], 4 \n\t" PTR_ADDIU "%[g_temp1], %[g_temp1], 4 \n\t"
"addiu %[pok1], %[pok1], 4 \n\t" PTR_ADDIU "%[pok1], %[pok1], 4 \n\t"
"addiu %[q_temp1], %[q_temp1], 4 \n\t" PTR_ADDIU "%[q_temp1], %[q_temp1], 4 \n\t"
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
...@@ -460,8 +461,8 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2], ...@@ -460,8 +461,8 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2],
"madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t" "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
"swc1 %[temp4], 0(%[out]) \n\t" "swc1 %[temp4], 0(%[out]) \n\t"
"swc1 %[temp5], 8(%[out]) \n\t" "swc1 %[temp5], 8(%[out]) \n\t"
"addiu %[in], %[in], 8 \n\t" PTR_ADDIU "%[in], %[in], 8 \n\t"
"addiu %[out], %[out], 16 \n\t" PTR_ADDIU "%[out], %[out], 16 \n\t"
: [temp0]"=&f" (temp0), [temp1]"=&f"(temp1), : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
[temp4]"=&f" (temp4), [temp5]"=&f"(temp5), [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
......
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include "libavcodec/aac.h" #include "libavcodec/aac.h"
#include "libavcodec/sbr.h" #include "libavcodec/sbr.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct, static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
...@@ -89,8 +90,8 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct, ...@@ -89,8 +90,8 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
"sw %[temp5], 20(%[w0]) \n\t" "sw %[temp5], 20(%[w0]) \n\t"
"sw %[temp6], 24(%[w0]) \n\t" "sw %[temp6], 24(%[w0]) \n\t"
"sw %[temp7], 28(%[w0]) \n\t" "sw %[temp7], 28(%[w0]) \n\t"
"addiu %[w0], %[w0], 32 \n\t" PTR_ADDIU " %[w0], %[w0], 32 \n\t"
"addiu %[w1], %[w1], 32 \n\t" PTR_ADDIU " %[w1], %[w1], 32 \n\t"
: [w0]"+r"(w0), [w1]"+r"(w1), : [w0]"+r"(w0), [w1]"+r"(w1),
[temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
...@@ -124,8 +125,8 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct, ...@@ -124,8 +125,8 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
"sw %[temp5], 20(%[w0]) \n\t" "sw %[temp5], 20(%[w0]) \n\t"
"sw %[temp6], 24(%[w0]) \n\t" "sw %[temp6], 24(%[w0]) \n\t"
"sw %[temp7], 28(%[w0]) \n\t" "sw %[temp7], 28(%[w0]) \n\t"
"addiu %[w0], %[w0], 32 \n\t" PTR_ADDIU " %[w0], %[w0], 32 \n\t"
"addiu %[w1], %[w1], 32 \n\t" PTR_ADDIU " %[w1], %[w1], 32 \n\t"
: [w0]"+r"(w0), [w1]"+r"(w1), : [w0]"+r"(w0), [w1]"+r"(w1),
[temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
...@@ -298,13 +299,13 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct, ...@@ -298,13 +299,13 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct,
"lwc1 %[temp7], 2052(%[s0]) \n\t" "lwc1 %[temp7], 2052(%[s0]) \n\t"
"madd.s %[temp0], %[temp0], %[temp12], %[temp13] \n\t" "madd.s %[temp0], %[temp0], %[temp12], %[temp13] \n\t"
"lwc1 %[temp8], 4104(%[v0]) \n\t" "lwc1 %[temp8], 4104(%[v0]) \n\t"
"addiu %[dst], %[dst], 16 \n\t" PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"madd.s %[temp1], %[temp1], %[temp14], %[temp15] \n\t" "madd.s %[temp1], %[temp1], %[temp14], %[temp15] \n\t"
"lwc1 %[temp9], 2056(%[s0]) \n\t" "lwc1 %[temp9], 2056(%[s0]) \n\t"
"addiu %[s0], %[s0], 16 \n\t" PTR_ADDIU " %[s0], %[s0], 16 \n\t"
"madd.s %[temp2], %[temp2], %[temp16], %[temp17] \n\t" "madd.s %[temp2], %[temp2], %[temp16], %[temp17] \n\t"
"lwc1 %[temp10], 4108(%[v0]) \n\t" "lwc1 %[temp10], 4108(%[v0]) \n\t"
"addiu %[v0], %[v0], 16 \n\t" PTR_ADDIU " %[v0], %[v0], 16 \n\t"
"madd.s %[temp3], %[temp3], %[temp18], %[temp19] \n\t" "madd.s %[temp3], %[temp3], %[temp18], %[temp19] \n\t"
"lwc1 %[temp11], 2044(%[s0]) \n\t" "lwc1 %[temp11], 2044(%[s0]) \n\t"
"lwc1 %[temp12], 4848(%[v0]) \n\t" "lwc1 %[temp12], 4848(%[v0]) \n\t"
...@@ -445,7 +446,7 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct, ...@@ -445,7 +446,7 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct,
"madd.s %[temp3], %[temp3], %[temp10], %[temp11] \n\t" "madd.s %[temp3], %[temp3], %[temp10], %[temp11] \n\t"
"lwc1 %[temp19], 2316(%[s0]) \n\t" "lwc1 %[temp19], 2316(%[s0]) \n\t"
"madd.s %[temp0], %[temp0], %[temp12], %[temp13] \n\t" "madd.s %[temp0], %[temp0], %[temp12], %[temp13] \n\t"
"addiu %[dst], %[dst], 16 \n\t" PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"madd.s %[temp1], %[temp1], %[temp14], %[temp15] \n\t" "madd.s %[temp1], %[temp1], %[temp14], %[temp15] \n\t"
"madd.s %[temp2], %[temp2], %[temp16], %[temp17] \n\t" "madd.s %[temp2], %[temp2], %[temp16], %[temp17] \n\t"
"madd.s %[temp3], %[temp3], %[temp18], %[temp19] \n\t" "madd.s %[temp3], %[temp3], %[temp18], %[temp19] \n\t"
......
This diff is collapsed.
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include "config.h" #include "config.h"
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavcodec/acelp_filters.h" #include "libavcodec/acelp_filters.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void ff_acelp_interpolatef_mips(float *out, const float *in, static void ff_acelp_interpolatef_mips(float *out, const float *in,
...@@ -82,11 +83,11 @@ static void ff_acelp_interpolatef_mips(float *out, const float *in, ...@@ -82,11 +83,11 @@ static void ff_acelp_interpolatef_mips(float *out, const float *in,
"lwc1 %[fc_val_p], 0(%[p_filter_coeffs_p]) \n\t" "lwc1 %[fc_val_p], 0(%[p_filter_coeffs_p]) \n\t"
"lwc1 %[in_val_m], 0(%[p_in_m]) \n\t" "lwc1 %[in_val_m], 0(%[p_in_m]) \n\t"
"lwc1 %[fc_val_m], 0(%[p_filter_coeffs_m]) \n\t" "lwc1 %[fc_val_m], 0(%[p_filter_coeffs_m]) \n\t"
"addiu %[p_in_p], %[p_in_p], 4 \n\t" PTR_ADDIU "%[p_in_p], %[p_in_p], 4 \n\t"
"madd.s %[v],%[v], %[in_val_p],%[fc_val_p] \n\t" "madd.s %[v],%[v], %[in_val_p],%[fc_val_p] \n\t"
"addiu %[p_in_m], %[p_in_m], -4 \n\t" PTR_ADDIU "%[p_in_m], %[p_in_m], -4 \n\t"
"addu %[p_filter_coeffs_p], %[p_filter_coeffs_p], %[prec] \n\t" PTR_ADDU "%[p_filter_coeffs_p],%[p_filter_coeffs_p], %[prec] \n\t"
"addu %[p_filter_coeffs_m], %[p_filter_coeffs_m], %[prec] \n\t" PTR_ADDU "%[p_filter_coeffs_m],%[p_filter_coeffs_m], %[prec] \n\t"
"madd.s %[v],%[v],%[in_val_m], %[fc_val_m] \n\t" "madd.s %[v],%[v],%[in_val_m], %[fc_val_m] \n\t"
: [v] "+&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m), : [v] "+&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m),
...@@ -185,8 +186,8 @@ static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const floa ...@@ -185,8 +186,8 @@ static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const floa
"madd.s $f14, $f0, $f4, $f1 \n\t" "madd.s $f14, $f0, $f4, $f1 \n\t"
"madd.s $f14, $f14, $f5, $f13 \n\t" "madd.s $f14, $f14, $f5, $f13 \n\t"
"swc1 $f8, 24(%[out]) \n\t" "swc1 $f8, 24(%[out]) \n\t"
"addiu %[out], 32 \n\t" PTR_ADDIU "%[out], 32 \n\t"
"addiu %[in], 32 \n\t" PTR_ADDIU "%[in], 32 \n\t"
"addiu %[n], -8 \n\t" "addiu %[n], -8 \n\t"
"swc1 $f14, -4(%[out]) \n\t" "swc1 $f14, -4(%[out]) \n\t"
"bnez %[n], ff_acelp_apply_order_2_transfer_function_madd%= \n\t" "bnez %[n], ff_acelp_apply_order_2_transfer_function_madd%= \n\t"
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
*/ */
#include "config.h" #include "config.h"
#include "libavcodec/acelp_vectors.h" #include "libavcodec/acelp_vectors.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void ff_weighted_vector_sumf_mips( static void ff_weighted_vector_sumf_mips(
...@@ -75,11 +76,11 @@ static void ff_weighted_vector_sumf_mips( ...@@ -75,11 +76,11 @@ static void ff_weighted_vector_sumf_mips(
"mul.s $f5, %[weight_coeff_a], $f3 \n\t" "mul.s $f5, %[weight_coeff_a], $f3 \n\t"
"madd.s $f2, $f2, %[weight_coeff_b], $f1 \n\t" "madd.s $f2, $f2, %[weight_coeff_b], $f1 \n\t"
"madd.s $f5, $f5, %[weight_coeff_b], $f4 \n\t" "madd.s $f5, $f5, %[weight_coeff_b], $f4 \n\t"
"addiu %[in_a], 8 \n\t" PTR_ADDIU "%[in_a],8 \n\t"
"addiu %[in_b], 8 \n\t" PTR_ADDIU "%[in_b],8 \n\t"
"swc1 $f2, 0(%[out]) \n\t" "swc1 $f2, 0(%[out]) \n\t"
"swc1 $f5, 4(%[out]) \n\t" "swc1 $f5, 4(%[out]) \n\t"
"addiu %[out], 8 \n\t" PTR_ADDIU "%[out], 8 \n\t"
"bne %[in_a], %[a_end], ff_weighted_vector_sumf_madd%= \n\t" "bne %[in_a], %[a_end], ff_weighted_vector_sumf_madd%= \n\t"
"ff_weighted_vector_sumf_end%=: \n\t" "ff_weighted_vector_sumf_end%=: \n\t"
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavcodec/celp_filters.h" #include "libavcodec/celp_filters.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void ff_celp_lp_synthesis_filterf_mips(float *out, static void ff_celp_lp_synthesis_filterf_mips(float *out,
...@@ -118,8 +119,8 @@ static void ff_celp_lp_synthesis_filterf_mips(float *out, ...@@ -118,8 +119,8 @@ static void ff_celp_lp_synthesis_filterf_mips(float *out,
__asm__ volatile( __asm__ volatile(
"lwc1 %[old_out3], -20(%[p_out]) \n\t" "lwc1 %[old_out3], -20(%[p_out]) \n\t"
"lwc1 $f5, 16(%[p_filter_coeffs]) \n\t" "lwc1 $f5, 16(%[p_filter_coeffs]) \n\t"
"addiu %[p_out], -8 \n\t" PTR_ADDIU "%[p_out], -8 \n\t"
"addiu %[p_filter_coeffs], 8 \n\t" PTR_ADDIU "%[p_filter_coeffs], 8 \n\t"
"nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t" "nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t"
"nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t" "nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t"
"lwc1 $f4, 12(%[p_filter_coeffs]) \n\t" "lwc1 $f4, 12(%[p_filter_coeffs]) \n\t"
...@@ -181,8 +182,8 @@ static void ff_celp_lp_synthesis_filterf_mips(float *out, ...@@ -181,8 +182,8 @@ static void ff_celp_lp_synthesis_filterf_mips(float *out,
__asm__ volatile( __asm__ volatile(
"lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t" "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
"lwc1 %[out_val_i], -4(%[p_out]) \n\t" "lwc1 %[out_val_i], -4(%[p_out]) \n\t"
"addiu %[p_filter_coeffs], 4 \n\t" PTR_ADDIU "%[p_filter_coeffs], 4 \n\t"
"addiu %[p_out], -4 \n\t" PTR_ADDIU "%[p_out], -4 \n\t"
"nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t" "nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t"
: [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val), : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
...@@ -245,8 +246,8 @@ static void ff_celp_lp_zero_synthesis_filterf_mips(float *out, ...@@ -245,8 +246,8 @@ static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
"madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t" "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t"
"lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t" "lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t"
"lwc1 $f7, -8(%[p_in]) \n\t" "lwc1 $f7, -8(%[p_in]) \n\t"
"addiu %[p_filter_coeffs], 8 \n\t" PTR_ADDIU "%[p_filter_coeffs], 8 \n\t"
"addiu %[p_in], -8 \n\t" PTR_ADDIU "%[p_in], -8 \n\t"
"madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t" "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t"
"madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t" "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t"
"madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t" "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t"
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
*/ */
#include "config.h" #include "config.h"
#include "libavcodec/celp_math.h" #include "libavcodec/celp_math.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static float ff_dot_productf_mips(const float* a, const float* b, static float ff_dot_productf_mips(const float* a, const float* b,
...@@ -67,8 +68,8 @@ static float ff_dot_productf_mips(const float* a, const float* b, ...@@ -67,8 +68,8 @@ static float ff_dot_productf_mips(const float* a, const float* b,
"ff_dot_productf_madd%=: \n\t" "ff_dot_productf_madd%=: \n\t"
"lwc1 $f2, 0(%[a]) \n\t" "lwc1 $f2, 0(%[a]) \n\t"
"lwc1 $f1, 0(%[b]) \n\t" "lwc1 $f1, 0(%[b]) \n\t"
"addiu %[a], %[a], 4 \n\t" PTR_ADDIU "%[a], %[a], 4 \n\t"
"addiu %[b], %[b], 4 \n\t" PTR_ADDIU "%[b], %[b], 4 \n\t"
"madd.s %[sum], %[sum], $f1, $f2 \n\t" "madd.s %[sum], %[sum], $f1, $f2 \n\t"
"bne %[a], %[a_end], ff_dot_productf_madd%= \n\t" "bne %[a], %[a_end], ff_dot_productf_madd%= \n\t"
"ff_dot_productf_end%=: \n\t" "ff_dot_productf_end%=: \n\t"
......
...@@ -55,6 +55,8 @@ ...@@ -55,6 +55,8 @@
#ifndef AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H #ifndef AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H
#define AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H #define AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void compute_antialias_mips_float(MPADecodeContext *s, static void compute_antialias_mips_float(MPADecodeContext *s,
GranuleDef *g) GranuleDef *g)
...@@ -158,7 +160,7 @@ static void compute_antialias_mips_float(MPADecodeContext *s, ...@@ -158,7 +160,7 @@ static void compute_antialias_mips_float(MPADecodeContext *s,
"mul.s %[out4], %[in5], %[in7] \t\n" "mul.s %[out4], %[in5], %[in7] \t\n"
"swc1 %[out1], -7*4(%[ptr]) \t\n" "swc1 %[out1], -7*4(%[ptr]) \t\n"
"swc1 %[out2], 6*4(%[ptr]) \t\n" "swc1 %[out2], 6*4(%[ptr]) \t\n"
"addiu %[ptr], %[ptr], 72 \t\n" PTR_ADDIU "%[ptr],%[ptr], 72 \t\n"
"nmsub.s %[out3], %[out3], %[in7], %[in8] \t\n" "nmsub.s %[out3], %[out3], %[in7], %[in8] \t\n"
"madd.s %[out4], %[out4], %[in6], %[in8] \t\n" "madd.s %[out4], %[out4], %[in6], %[in8] \t\n"
"swc1 %[out3], -26*4(%[ptr]) \t\n" "swc1 %[out3], -26*4(%[ptr]) \t\n"
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include "config.h" #include "config.h"
#include "libavcodec/fft.h" #include "libavcodec/fft.h"
#include "libavcodec/fft_table.h" #include "libavcodec/fft_table.h"
#include "libavutil/mips/asmdefs.h"
/** /**
* FFT transform * FFT transform
...@@ -368,14 +369,14 @@ static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample ...@@ -368,14 +369,14 @@ static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample
"mul.s %[temp11], %[temp5], %[temp6] \t\n" "mul.s %[temp11], %[temp5], %[temp6] \t\n"
"mul.s %[temp12], %[temp5], %[temp7] \t\n" "mul.s %[temp12], %[temp5], %[temp7] \t\n"
"lwc1 %[temp8], 0(%[in3]) \t\n" "lwc1 %[temp8], 0(%[in3]) \t\n"
"addiu %[tcos1], %[tcos1], 8 \t\n" PTR_ADDIU " %[tcos1], %[tcos1], 8 \t\n"
"addiu %[tsin1], %[tsin1], 8 \t\n" PTR_ADDIU " %[tsin1], %[tsin1], 8 \t\n"
"addiu %[in1], %[in1], 16 \t\n" PTR_ADDIU " %[in1], %[in1], 16 \t\n"
"nmsub.s %[temp11], %[temp11], %[temp8], %[temp7] \t\n" "nmsub.s %[temp11], %[temp11], %[temp8], %[temp7] \t\n"
"madd.s %[temp12], %[temp12], %[temp8], %[temp6] \t\n" "madd.s %[temp12], %[temp12], %[temp8], %[temp6] \t\n"
"addiu %[in2], %[in2], -16 \t\n" PTR_ADDIU " %[in2], %[in2], -16 \t\n"
"addiu %[in3], %[in3], 16 \t\n" PTR_ADDIU " %[in3], %[in3], 16 \t\n"
"addiu %[in4], %[in4], -16 \t\n" PTR_ADDIU " %[in4], %[in4], -16 \t\n"
: [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
......
...@@ -50,9 +50,9 @@ ...@@ -50,9 +50,9 @@
#include "config.h" #include "config.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/fmtconvert.h" #include "libavcodec/fmtconvert.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void int32_to_float_fmul_scalar_mips(float *dst, const int *src, static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
float mul, int len) float mul, int len)
{ {
...@@ -86,7 +86,7 @@ static void int32_to_float_fmul_scalar_mips(float *dst, const int *src, ...@@ -86,7 +86,7 @@ static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
"mtc1 %[rpom12], %[temp13] \n\t" "mtc1 %[rpom12], %[temp13] \n\t"
"mtc1 %[rpom22], %[temp15] \n\t" "mtc1 %[rpom22], %[temp15] \n\t"
"addiu %[src], 32 \n\t" PTR_ADDIU "%[src], 32 \n\t"
"cvt.s.w %[temp1], %[temp1] \n\t" "cvt.s.w %[temp1], %[temp1] \n\t"
"cvt.s.w %[temp3], %[temp3] \n\t" "cvt.s.w %[temp3], %[temp3] \n\t"
"cvt.s.w %[temp5], %[temp5] \n\t" "cvt.s.w %[temp5], %[temp5] \n\t"
...@@ -116,7 +116,7 @@ static void int32_to_float_fmul_scalar_mips(float *dst, const int *src, ...@@ -116,7 +116,7 @@ static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
"swc1 %[temp11], 20(%[dst]) \n\t" /*dst[i+5] = src[i+5] * mul;*/ "swc1 %[temp11], 20(%[dst]) \n\t" /*dst[i+5] = src[i+5] * mul;*/
"swc1 %[temp13], 24(%[dst]) \n\t" /*dst[i+6] = src[i+6] * mul;*/ "swc1 %[temp13], 24(%[dst]) \n\t" /*dst[i+6] = src[i+6] * mul;*/
"swc1 %[temp15], 28(%[dst]) \n\t" /*dst[i+7] = src[i+7] * mul;*/ "swc1 %[temp15], 28(%[dst]) \n\t" /*dst[i+7] = src[i+7] * mul;*/
"addiu %[dst], 32 \n\t" PTR_ADDIU "%[dst], 32 \n\t"
"bne %[src], %[src_end], i32tf_lp%= \n\t" "bne %[src], %[src_end], i32tf_lp%= \n\t"
: [temp1]"=&f"(temp1), [temp11]"=&f"(temp11), : [temp1]"=&f"(temp1), [temp11]"=&f"(temp11),
[temp13]"=&f"(temp13), [temp15]"=&f"(temp15), [temp13]"=&f"(temp13), [temp15]"=&f"(temp15),
......
...@@ -55,6 +55,8 @@ ...@@ -55,6 +55,8 @@
#define AVCODEC_LSP_MIPS_H #define AVCODEC_LSP_MIPS_H
#if HAVE_MIPSFPU && HAVE_INLINE_ASM #if HAVE_MIPSFPU && HAVE_INLINE_ASM
#include "libavutil/mips/asmdefs.h"
static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order) static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order)
{ {
int i, j = 0; int i, j = 0;
...@@ -73,7 +75,7 @@ static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int ...@@ -73,7 +75,7 @@ static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int
__asm__ volatile( __asm__ volatile(
"move %[p_f], %[p_fi] \n\t" "move %[p_f], %[p_fi] \n\t"
"add.d %[val], %[val], %[val] \n\t" "add.d %[val], %[val], %[val] \n\t"
"addiu %[p_fi], 8 \n\t" PTR_ADDIU "%[p_fi], 8 \n\t"
"ldc1 %[f_j_1], 0(%[p_f]) \n\t" "ldc1 %[f_j_1], 0(%[p_f]) \n\t"
"ldc1 %[f_j], 8(%[p_f]) \n\t" "ldc1 %[f_j], 8(%[p_f]) \n\t"
"neg.d %[val], %[val] \n\t" "neg.d %[val], %[val] \n\t"
...@@ -91,7 +93,7 @@ static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int ...@@ -91,7 +93,7 @@ static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int
"mov.d %[f_j_1], %[f_j_2] \n\t" "mov.d %[f_j_1], %[f_j_2] \n\t"
"ldc1 %[f_j_2], -16(%[p_f]) \n\t" "ldc1 %[f_j_2], -16(%[p_f]) \n\t"
"sdc1 %[tmp], 8(%[p_f]) \n\t" "sdc1 %[tmp], 8(%[p_f]) \n\t"
"addiu %[p_f], -8 \n\t" PTR_ADDIU "%[p_f], -8 \n\t"
"bgtz %[j], ff_lsp2polyf_lp_j%= \n\t" "bgtz %[j], ff_lsp2polyf_lp_j%= \n\t"
"ff_lsp2polyf_lp_j_end%=: \n\t" "ff_lsp2polyf_lp_j_end%=: \n\t"
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include <string.h> #include <string.h>
#include "libavutil/mips/asmdefs.h"
#include "libavcodec/mpegaudiodsp.h" #include "libavcodec/mpegaudiodsp.h"
static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *window, static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *window,
...@@ -152,7 +153,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo ...@@ -152,7 +153,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"extr.w %[sum1], $ac0, 24 \n\t" "extr.w %[sum1], $ac0, 24 \n\t"
"mflo %[temp3] \n\t" "mflo %[temp3] \n\t"
"addi %[w], %[w], 4 \n\t" PTR_ADDIU "%[w], %[w], 4 \n\t"
"and %[temp1], %[temp3], 0x00ffffff \n\t" "and %[temp1], %[temp3], 0x00ffffff \n\t"
"slt %[temp2], %[sum1], %[min_asm] \n\t" "slt %[temp2], %[sum1], %[min_asm] \n\t"
"movn %[sum1], %[min_asm], %[temp2] \n\t" "movn %[sum1], %[min_asm], %[temp2] \n\t"
...@@ -180,7 +181,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo ...@@ -180,7 +181,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"mtlo $0, $ac1 \n\t" "mtlo $0, $ac1 \n\t"
"mthi $0 \n\t" "mthi $0 \n\t"
"mtlo %[temp1] \n\t" "mtlo %[temp1] \n\t"
"addi %[p_temp1], %[p_temp1], 4 \n\t" PTR_ADDIU "%[p_temp1], %[p_temp1], 4 \n\t"
"lw %[w_asm], 0(%[w]) \n\t" "lw %[w_asm], 0(%[w]) \n\t"
"lw %[p_asm], 0(%[p_temp1]) \n\t" "lw %[p_asm], 0(%[p_temp1]) \n\t"
"lw %[w2_asm], 0(%[w2]) \n\t" "lw %[w2_asm], 0(%[w2]) \n\t"
...@@ -221,7 +222,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo ...@@ -221,7 +222,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"msub $ac1, %[w2_asm], %[p_asm] \n\t" "msub $ac1, %[w2_asm], %[p_asm] \n\t"
"madd %[w_asm1], %[p_asm1] \n\t" "madd %[w_asm1], %[p_asm1] \n\t"
"msub $ac1, %[w2_asm1], %[p_asm1] \n\t" "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
"addi %[p_temp2], %[p_temp2], -4 \n\t" PTR_ADDIU "%[p_temp2], %[p_temp2], -4 \n\t"
"lw %[w_asm], 32*4(%[w]) \n\t" "lw %[w_asm], 32*4(%[w]) \n\t"
"lw %[p_asm], 0(%[p_temp2]) \n\t" "lw %[p_asm], 0(%[p_temp2]) \n\t"
"lw %[w2_asm], 32*4(%[w2]) \n\t" "lw %[w2_asm], 32*4(%[w2]) \n\t"
...@@ -262,8 +263,8 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo ...@@ -262,8 +263,8 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"msub %[w_asm1], %[p_asm1] \n\t" "msub %[w_asm1], %[p_asm1] \n\t"
"msub $ac1, %[w2_asm], %[p_asm] \n\t" "msub $ac1, %[w2_asm], %[p_asm] \n\t"
"msub $ac1, %[w2_asm1], %[p_asm1] \n\t" "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
"addi %[w], %[w], 4 \n\t" PTR_ADDIU "%[w], %[w], 4 \n\t"
"addi %[w2], %[w2], -4 \n\t" PTR_ADDIU "%[w2], %[w2], -4 \n\t"
"mflo %[temp2] \n\t" "mflo %[temp2] \n\t"
"extr.w %[sum1], $ac0, 24 \n\t" "extr.w %[sum1], $ac0, 24 \n\t"
"li %[temp3], 1 \n\t" "li %[temp3], 1 \n\t"
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include <string.h> #include <string.h>
#include "libavutil/mips/asmdefs.h"
#include "libavcodec/mpegaudiodsp.h" #include "libavcodec/mpegaudiodsp.h"
static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window, static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
...@@ -89,7 +90,7 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window, ...@@ -89,7 +90,7 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
"sw $zero, 0(%[dither_state]) \t\n" "sw $zero, 0(%[dither_state]) \t\n"
"lwc1 %[in3], 64*4(%[window]) \t\n" "lwc1 %[in3], 64*4(%[window]) \t\n"
"lwc1 %[in4], 80*4(%[synth_buf]) \t\n" "lwc1 %[in4], 80*4(%[synth_buf]) \t\n"
"addu %[samples2], %[samples], %[t_sample] \t\n" PTR_ADDU "%[samples2],%[samples], %[t_sample] \t\n"
"madd.s %[sum], %[sum], %[in1], %[in2] \t\n" "madd.s %[sum], %[sum], %[in1], %[in2] \t\n"
"lwc1 %[in5], 128*4(%[window]) \t\n" "lwc1 %[in5], 128*4(%[window]) \t\n"
"lwc1 %[in6], 144*4(%[synth_buf]) \t\n" "lwc1 %[in6], 144*4(%[synth_buf]) \t\n"
...@@ -131,15 +132,15 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window, ...@@ -131,15 +132,15 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
"lwc1 %[in7], 480*4(%[window]) \t\n" "lwc1 %[in7], 480*4(%[window]) \t\n"
"lwc1 %[in8], 496*4(%[synth_buf]) \t\n" "lwc1 %[in8], 496*4(%[synth_buf]) \t\n"
"nmsub.s %[sum], %[sum], %[in1], %[in2] \t\n" "nmsub.s %[sum], %[sum], %[in1], %[in2] \t\n"
"addu %[w], %[window], 4 \t\n" PTR_ADDU "%[w], %[window], 4 \t\n"
"nmsub.s %[sum], %[sum], %[in3], %[in4] \t\n" "nmsub.s %[sum], %[sum], %[in3], %[in4] \t\n"
"addu %[w2], %[window], 124 \t\n" PTR_ADDU "%[w2], %[window], 124 \t\n"
"addiu %[p], %[synth_buf], 68 \t\n" PTR_ADDIU "%[p], %[synth_buf], 68 \t\n"
"addiu %[p2], %[synth_buf], 188 \t\n" PTR_ADDIU "%[p2], %[synth_buf], 188 \t\n"
"nmsub.s %[sum], %[sum], %[in5], %[in6] \t\n" "nmsub.s %[sum], %[sum], %[in5], %[in6] \t\n"
"nmsub.s %[sum], %[sum], %[in7], %[in8] \t\n" "nmsub.s %[sum], %[sum], %[in7], %[in8] \t\n"
"swc1 %[sum], 0(%[samples]) \t\n" "swc1 %[sum], 0(%[samples]) \t\n"
"addu %[samples], %[samples], %[incr1] \t\n" PTR_ADDU "%[samples], %[samples], %[incr1] \t\n"
/* calculate two samples at the same time to avoid one memory /* calculate two samples at the same time to avoid one memory
access per two sample */ access per two sample */
...@@ -223,17 +224,17 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window, ...@@ -223,17 +224,17 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
"nmsub.s %[sum], %[sum], %[in1], %[in2] \t\n" "nmsub.s %[sum], %[sum], %[in1], %[in2] \t\n"
"lwc1 %[in6], 480*4(%[w2]) \t\n" "lwc1 %[in6], 480*4(%[w2]) \t\n"
"nmsub.s %[sum2], %[sum2], %[in2], %[in3] \t\n" "nmsub.s %[sum2], %[sum2], %[in2], %[in3] \t\n"
"addiu %[w], %[w], 4 \t\n" PTR_ADDIU "%[w], %[w], 4 \t\n"
"nmsub.s %[sum], %[sum], %[in4], %[in5] \t\n" "nmsub.s %[sum], %[sum], %[in4], %[in5] \t\n"
"addiu %[w2], %[w2], -4 \t\n" PTR_ADDIU "%[w2], %[w2], -4 \t\n"
"nmsub.s %[sum2], %[sum2], %[in5], %[in6] \t\n" "nmsub.s %[sum2], %[sum2], %[in5], %[in6] \t\n"
"addu %[j], %[j], 4 \t\n" "addu %[j], %[j], 4 \t\n"
"addiu %[p], 4 \t\n" PTR_ADDIU "%[p], 4 \t\n"
"swc1 %[sum], 0(%[samples]) \t\n" "swc1 %[sum], 0(%[samples]) \t\n"
"addiu %[p2], -4 \t\n" PTR_ADDIU "%[p2], -4 \t\n"
"swc1 %[sum2], 0(%[samples2]) \t\n" "swc1 %[sum2], 0(%[samples2]) \t\n"
"addu %[samples], %[samples], %[incr1] \t\n" PTR_ADDU "%[samples], %[samples], %[incr1] \t\n"
"subu %[samples2], %[samples2], %[incr1] \t\n" PTR_SUBU "%[samples2],%[samples2], %[incr1] \t\n"
"bne %[j], 64, ff_mpadsp_apply_window_loop%= \t\n" "bne %[j], 64, ff_mpadsp_apply_window_loop%= \t\n"
"lwc1 %[in1], 48*4(%[window]) \t\n" "lwc1 %[in1], 48*4(%[window]) \t\n"
......
This diff is collapsed.
/*
* Copyright (c) 2015 Imagination Technologies Ltd
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* MIPS assembly defines from sys/asm.h but rewritten for use with C inline
* assembly (rather than from within .s files).
*/
#ifndef AVCODEC_MIPS_ASMDEFS_H
#define AVCODEC_MIPS_ASMDEFS_H
#include <sgidefs.h>
#if _MIPS_SIM == _ABI64
# define PTRSIZE " 8 "
# define PTRLOG " 3 "
# define PTR_ADDU "daddu "
# define PTR_ADDIU "daddiu "
# define PTR_SUBU "dsubu "
# define PTR_L "ld "
#else
# define PTRSIZE " 4 "
# define PTRLOG " 2 "
# define PTR_ADDU "addu "
# define PTR_ADDIU "addiu "
# define PTR_SUBU "subu "
# define PTR_L "lw "
#endif
#endif /* AVCODEC_MIPS_ASMDEFS_H */
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
#include "config.h" #include "config.h"
#include "libavutil/float_dsp.h" #include "libavutil/float_dsp.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM && HAVE_MIPSFPU #if HAVE_INLINE_ASM && HAVE_MIPSFPU
static void vector_fmul_mips(float *dst, const float *src0, const float *src1, static void vector_fmul_mips(float *dst, const float *src0, const float *src1,
...@@ -90,9 +91,9 @@ static void vector_fmul_mips(float *dst, const float *src0, const float *src1, ...@@ -90,9 +91,9 @@ static void vector_fmul_mips(float *dst, const float *src0, const float *src1,
"swc1 %[src0_1], 4(%[d]) \n\t" "swc1 %[src0_1], 4(%[d]) \n\t"
"swc1 %[src0_2], 8(%[d]) \n\t" "swc1 %[src0_2], 8(%[d]) \n\t"
"swc1 %[src0_3], 12(%[d]) \n\t" "swc1 %[src0_3], 12(%[d]) \n\t"
"addiu %[s0], %[s0], 16 \n\t" PTR_ADDIU "%[s0], %[s0], 16 \n\t"
"addiu %[s1], %[s1], 16 \n\t" PTR_ADDIU "%[s1], %[s1], 16 \n\t"
"addiu %[d], %[d], 16 \n\t" PTR_ADDIU "%[d], %[d], 16 \n\t"
"bne %[d], %[d_end], 1b \n\t" "bne %[d], %[d_end], 1b \n\t"
: [src0_0]"=&f"(src0_0), [src0_1]"=&f"(src0_1), : [src0_0]"=&f"(src0_0), [src0_1]"=&f"(src0_1),
...@@ -122,12 +123,12 @@ static void vector_fmul_scalar_mips(float *dst, const float *src, float mul, ...@@ -122,12 +123,12 @@ static void vector_fmul_scalar_mips(float *dst, const float *src, float mul,
"lwc1 %[temp1], 4(%[src]) \n\t" "lwc1 %[temp1], 4(%[src]) \n\t"
"lwc1 %[temp2], 8(%[src]) \n\t" "lwc1 %[temp2], 8(%[src]) \n\t"
"lwc1 %[temp3], 12(%[src]) \n\t" "lwc1 %[temp3], 12(%[src]) \n\t"
"addiu %[dst], %[dst], 16 \n\t" PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"mul.s %[temp0], %[temp0], %[mul] \n\t" "mul.s %[temp0], %[temp0], %[mul] \n\t"
"mul.s %[temp1], %[temp1], %[mul] \n\t" "mul.s %[temp1], %[temp1], %[mul] \n\t"
"mul.s %[temp2], %[temp2], %[mul] \n\t" "mul.s %[temp2], %[temp2], %[mul] \n\t"
"mul.s %[temp3], %[temp3], %[mul] \n\t" "mul.s %[temp3], %[temp3], %[mul] \n\t"
"addiu %[src], %[src], 16 \n\t" PTR_ADDIU "%[src], %[src], 16 \n\t"
"swc1 %[temp0], -16(%[dst]) \n\t" "swc1 %[temp0], -16(%[dst]) \n\t"
"swc1 %[temp1], -12(%[dst]) \n\t" "swc1 %[temp1], -12(%[dst]) \n\t"
"swc1 %[temp2], -8(%[dst]) \n\t" "swc1 %[temp2], -8(%[dst]) \n\t"
...@@ -251,8 +252,8 @@ static void butterflies_float_mips(float *av_restrict v1, float *av_restrict v2, ...@@ -251,8 +252,8 @@ static void butterflies_float_mips(float *av_restrict v1, float *av_restrict v2,
"add.s %[temp13], %[temp2], %[temp6] \n\t" "add.s %[temp13], %[temp2], %[temp6] \n\t"
"sub.s %[temp14], %[temp3], %[temp7] \n\t" "sub.s %[temp14], %[temp3], %[temp7] \n\t"
"add.s %[temp15], %[temp3], %[temp7] \n\t" "add.s %[temp15], %[temp3], %[temp7] \n\t"
"addiu %[v1], %[v1], 16 \n\t" PTR_ADDIU "%[v1], %[v1], 16 \n\t"
"addiu %[v2], %[v2], 16 \n\t" PTR_ADDIU "%[v2], %[v2], 16 \n\t"
"addiu %[pom], %[pom], -1 \n\t" "addiu %[pom], %[pom], -1 \n\t"
"lwc1 %[temp0], 0(%[v1]) \n\t" "lwc1 %[temp0], 0(%[v1]) \n\t"
"lwc1 %[temp1], 4(%[v1]) \n\t" "lwc1 %[temp1], 4(%[v1]) \n\t"
...@@ -321,9 +322,9 @@ static void vector_fmul_reverse_mips(float *dst, const float *src0, const float ...@@ -321,9 +322,9 @@ static void vector_fmul_reverse_mips(float *dst, const float *src0, const float
"mul.s %[temp2], %[temp3], %[temp2] \n\t" "mul.s %[temp2], %[temp3], %[temp2] \n\t"
"mul.s %[temp4], %[temp5], %[temp4] \n\t" "mul.s %[temp4], %[temp5], %[temp4] \n\t"
"mul.s %[temp6], %[temp7], %[temp6] \n\t" "mul.s %[temp6], %[temp7], %[temp6] \n\t"
"addiu %[src0], %[src0], 16 \n\t" PTR_ADDIU "%[src0], %[src0], 16 \n\t"
"addiu %[src1], %[src1], -16 \n\t" PTR_ADDIU "%[src1], %[src1], -16 \n\t"
"addiu %[dst], %[dst], 16 \n\t" PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"swc1 %[temp0], -16(%[dst]) \n\t" "swc1 %[temp0], -16(%[dst]) \n\t"
"swc1 %[temp2], -12(%[dst]) \n\t" "swc1 %[temp2], -12(%[dst]) \n\t"
"swc1 %[temp4], -8(%[dst]) \n\t" "swc1 %[temp4], -8(%[dst]) \n\t"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment