Commit fdac5ff6 authored by 周晓勇's avatar 周晓勇 Committed by Michael Niedermayer

avcodec: loongson relocate constants of idctdsp and h264pred

Signed-off-by: 's avatarMichael Niedermayer <michael@niedermayer.cc>
parent 4bd61933
...@@ -42,6 +42,8 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) = {0x0004000300020001ULL}; ...@@ -42,6 +42,8 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) = {0x0004000300020001ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) = {0x0008000700060005ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) = {0x0008000700060005ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) = {0x0003000200010000ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) = {0x0003000200010000ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) = {0x0007000600050004ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) = {0x0007000600050004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) = {0x000b000a00090008ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) = {0x000f000e000d000cULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) = {0x0101010101010101ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) = {0x0101010101010101ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) = {0x0303030303030303ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) = {0x0303030303030303ULL};
...@@ -51,3 +53,6 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) = {0xA1A1A1A1A1A1A1A1ULL}; ...@@ -51,3 +53,6 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) = {0xA1A1A1A1A1A1A1A1ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_rnd) = {0x0004000400040004ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_rnd) = {0x0004000400040004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) = {0x0040004000400040ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) = {0x0040004000400040ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) = {0x0020002000200020ULL}; DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) = {0x0020002000200020ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xFFFF0000FFFF0000ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_d40000) = {0x0000000000040000ULL};
...@@ -43,6 +43,8 @@ extern const uint64_t ff_pw_1to4; ...@@ -43,6 +43,8 @@ extern const uint64_t ff_pw_1to4;
extern const uint64_t ff_pw_5to8; extern const uint64_t ff_pw_5to8;
extern const uint64_t ff_pw_0to3; extern const uint64_t ff_pw_0to3;
extern const uint64_t ff_pw_4to7; extern const uint64_t ff_pw_4to7;
extern const uint64_t ff_pw_8tob;
extern const uint64_t ff_pw_ctof;
extern const uint64_t ff_pb_1; extern const uint64_t ff_pb_1;
extern const uint64_t ff_pb_3; extern const uint64_t ff_pb_3;
...@@ -53,4 +55,7 @@ extern const uint64_t ff_rnd; ...@@ -53,4 +55,7 @@ extern const uint64_t ff_rnd;
extern const uint64_t ff_rnd2; extern const uint64_t ff_rnd2;
extern const uint64_t ff_rnd3; extern const uint64_t ff_rnd3;
extern const uint64_t ff_wm1010;
extern const uint64_t ff_d40000;
#endif /* AVCODEC_MIPS_CONSTANTS_H */ #endif /* AVCODEC_MIPS_CONSTANTS_H */
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
*/ */
#include "h264pred_mips.h" #include "h264pred_mips.h"
#include "constants.h"
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride) void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
{ {
...@@ -50,14 +51,12 @@ void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride) ...@@ -50,14 +51,12 @@ void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
{ {
__asm__ volatile ( __asm__ volatile (
".set arch=loongson3a \r\n"
"daddiu $2, %0, -1 \r\n" "daddiu $2, %0, -1 \r\n"
"daddu $3, %0, $0 \r\n" "daddu $3, %0, $0 \r\n"
"dli $6, 0x10 \r\n" "dli $6, 0x10 \r\n"
"dli $7, 0x0101010101010101 \r\n"
"1: \r\n" "1: \r\n"
"lbu $4, 0($2) \r\n" "lbu $4, 0($2) \r\n"
"dmul $5, $4, $7 \r\n" "dmul $5, $4, %2 \r\n"
"sdl $5, 7($3) \r\n" "sdl $5, 7($3) \r\n"
"sdr $5, 0($3) \r\n" "sdr $5, 0($3) \r\n"
"sdl $5, 15($3) \r\n" "sdl $5, 15($3) \r\n"
...@@ -66,7 +65,7 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) ...@@ -66,7 +65,7 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
"daddu $3, %1 \r\n" "daddu $3, %1 \r\n"
"daddiu $6, -1 \r\n" "daddiu $6, -1 \r\n"
"bnez $6, 1b \r\n" "bnez $6, 1b \r\n"
::"r"(src),"r"(stride) ::"r"(src),"r"(stride),"r"(ff_pb_1)
: "$2","$3","$4","$5","$6","memory" : "$2","$3","$4","$5","$6","memory"
); );
} }
...@@ -74,7 +73,6 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) ...@@ -74,7 +73,6 @@ void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride) void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
{ {
__asm__ volatile ( __asm__ volatile (
".set arch=loongson3a \r\n"
"daddiu $2, %0, -1 \r\n" "daddiu $2, %0, -1 \r\n"
"dli $6, 0x10 \r\n" "dli $6, 0x10 \r\n"
"xor $8, $8, $8 \r\n" "xor $8, $8, $8 \r\n"
...@@ -93,10 +91,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride) ...@@ -93,10 +91,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
"daddiu $2, $2, 1 \r\n" "daddiu $2, $2, 1 \r\n"
"daddiu $6, $6, -1 \r\n" "daddiu $6, $6, -1 \r\n"
"bnez $6, 2b \r\n" "bnez $6, 2b \r\n"
"dli $7, 0x0101010101010101 \r\n"
"daddiu $8, $8, 0x10 \r\n" "daddiu $8, $8, 0x10 \r\n"
"dsra $8, 5 \r\n" "dsra $8, 5 \r\n"
"dmul $5, $8, $7 \r\n" "dmul $5, $8, %2 \r\n"
"daddu $2, %0, $0 \r\n" "daddu $2, %0, $0 \r\n"
"dli $6, 0x10 \r\n" "dli $6, 0x10 \r\n"
"3: \r\n" "3: \r\n"
...@@ -107,8 +104,8 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride) ...@@ -107,8 +104,8 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
"daddu $2, $2, %1 \r\n" "daddu $2, $2, %1 \r\n"
"daddiu $6, $6, -1 \r\n" "daddiu $6, $6, -1 \r\n"
"bnez $6, 3b \r\n" "bnez $6, 3b \r\n"
::"r"(src),"r"(stride) ::"r"(src),"r"(stride),"r"(ff_pb_1)
: "$2","$3","$4","$5","$6","$7","$8","memory" : "$2","$3","$4","$5","$6","$8","memory"
); );
} }
...@@ -348,10 +345,8 @@ void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ...@@ -348,10 +345,8 @@ void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
+ src[-1+2*stride] + src[-1+3*stride] + 4) >>3; + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
__asm__ volatile ( __asm__ volatile (
".set arch=loongson3a \r\n"
"dli $4, 0x01010101010101 \r\n"
"daddu $2, %2, $0 \r\n" "daddu $2, %2, $0 \r\n"
"dmul $3, $2, $4 \r\n" "dmul $3, $2, %3 \r\n"
"xor $4, $4, $4 \r\n" "xor $4, $4, $4 \r\n"
"gsswx $3, 0(%0,$4) \r\n" "gsswx $3, 0(%0,$4) \r\n"
"daddu $4, %1 \r\n" "daddu $4, %1 \r\n"
...@@ -360,7 +355,7 @@ void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright, ...@@ -360,7 +355,7 @@ void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
"gsswx $3, 0(%0,$4) \r\n" "gsswx $3, 0(%0,$4) \r\n"
"daddu $4, %1 \r\n" "daddu $4, %1 \r\n"
"gsswx $3, 0(%0,$4) \r\n" "gsswx $3, 0(%0,$4) \r\n"
::"r"(src),"r"(stride),"r"(dc) ::"r"(src),"r"(stride),"r"(dc),"r"(ff_pb_1)
: "$2","$3","$4","memory" : "$2","$3","$4","memory"
); );
} }
...@@ -387,21 +382,19 @@ void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride) ...@@ -387,21 +382,19 @@ void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
{ {
__asm__ volatile ( __asm__ volatile (
".set arch=loongson3a \r\n"
"daddiu $2, %0, -1 \r\n" "daddiu $2, %0, -1 \r\n"
"daddu $3, %0, $0 \r\n" "daddu $3, %0, $0 \r\n"
"dli $6, 0x8 \r\n" "dli $6, 0x8 \r\n"
"dli $7, 0x0101010101010101 \r\n"
"1: \r\n" "1: \r\n"
"lbu $4, 0($2) \r\n" "lbu $4, 0($2) \r\n"
"dmul $5, $4, $7 \r\n" "dmul $5, $4, %2 \r\n"
"sdl $5, 7($3) \r\n" "sdl $5, 7($3) \r\n"
"sdr $5, 0($3) \r\n" "sdr $5, 0($3) \r\n"
"daddu $2, %1 \r\n" "daddu $2, %1 \r\n"
"daddu $3, %1 \r\n" "daddu $3, %1 \r\n"
"daddiu $6, -1 \r\n" "daddiu $6, -1 \r\n"
"bnez $6, 1b \r\n" "bnez $6, 1b \r\n"
::"r"(src),"r"(stride) ::"r"(src),"r"(stride),"r"(ff_pb_1)
: "$2","$3","$4","$5","$6","memory" : "$2","$3","$4","$5","$6","memory"
); );
} }
...@@ -425,14 +418,10 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride, ...@@ -425,14 +418,10 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
"punpcklbh $f2, $f2, $f8 \r\n" "punpcklbh $f2, $f2, $f8 \r\n"
"punpcklbh $f4, $f4, $f8 \r\n" "punpcklbh $f4, $f4, $f8 \r\n"
"punpcklbh $f6, $f6, $f8 \r\n" "punpcklbh $f6, $f6, $f8 \r\n"
"dli $4, 0xfffbfffafff9fff8 \r\n" "dmtc1 %4, $f20 \r\n"
"dmtc1 $4, $f20 \r\n" "dmtc1 %5, $f22 \r\n"
"dli $4, 0xfffffffefffdfffc \r\n" "dmtc1 %6, $f24 \r\n"
"dmtc1 $4, $f22 \r\n" "dmtc1 %7, $f26 \r\n"
"dli $4, 0x0004000300020001 \r\n"
"dmtc1 $4, $f24 \r\n"
"dli $4, 0x0008000700060005 \r\n"
"dmtc1 $4, $f26 \r\n"
"pmullh $f0, $f0, $f20 \r\n" "pmullh $f0, $f0, $f20 \r\n"
"pmullh $f2, $f2, $f22 \r\n" "pmullh $f2, $f2, $f22 \r\n"
"pmullh $f4, $f4, $f24 \r\n" "pmullh $f4, $f4, $f24 \r\n"
...@@ -576,17 +565,13 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride, ...@@ -576,17 +565,13 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
"pshufh $f12, $f12, $f8 \r\n" "pshufh $f12, $f12, $f8 \r\n"
"dli $4, 5 \r\n" "dli $4, 5 \r\n"
"dmtc1 $4, $f14 \r\n" "dmtc1 $4, $f14 \r\n"
"dli $4, 0x0003000200010000 \r\n" "dmtc1 %8, $f2 \r\n"
"dmtc1 $4, $f2 \r\n"
"pmullh $f2, $f2, $f0 \r\n" "pmullh $f2, $f2, $f0 \r\n"
"dli $4, 0x0007000600050004 \r\n" "dmtc1 %9, $f4 \r\n"
"dmtc1 $4, $f4 \r\n"
"pmullh $f4, $f4, $f0 \r\n" "pmullh $f4, $f4, $f0 \r\n"
"dli $4, 0x000b000a00090008 \r\n" "dmtc1 %10, $f6 \r\n"
"dmtc1 $4, $f6 \r\n"
"pmullh $f6, $f6, $f0 \r\n" "pmullh $f6, $f6, $f0 \r\n"
"dli $4, 0x000f000e000d000c \r\n" "dmtc1 %11, $f8 \r\n"
"dmtc1 $4, $f8 \r\n"
"pmullh $f8, $f8, $f0 \r\n" "pmullh $f8, $f8, $f0 \r\n"
"daddu $3, %0, $0 \r\n" "daddu $3, %0, $0 \r\n"
"dli $2, 16 \r\n" "dli $2, 16 \r\n"
...@@ -609,7 +594,9 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride, ...@@ -609,7 +594,9 @@ static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
"daddu $3, %1 \r\n" "daddu $3, %1 \r\n"
"daddiu $2, -1 \r\n" "daddiu $2, -1 \r\n"
"bnez $2, 1b \r\n" "bnez $2, 1b \r\n"
::"r"(src),"r"(stride),"r"(svq3),"r"(rv40) ::"r"(src),"r"(stride),"r"(svq3),"r"(rv40),
"r"(ff_pw_m8tom5),"r"(ff_pw_m4tom1),"r"(ff_pw_1to4),"r"(ff_pw_5to8),
"r"(ff_pw_0to3),"r"(ff_pw_4to7),"r"(ff_pw_8tob),"r"(ff_pw_ctof)
: "$2","$3","$4","$5","$6","$7","$8","memory" : "$2","$3","$4","$5","$6","$7","$8","memory"
); );
} }
...@@ -784,21 +771,19 @@ void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride) ...@@ -784,21 +771,19 @@ void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride) void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
{ {
__asm__ volatile ( __asm__ volatile (
".set arch=loongson3a \r\n"
"daddiu $2, %0, -1 \r\n" "daddiu $2, %0, -1 \r\n"
"daddu $3, %0, $0 \r\n" "daddu $3, %0, $0 \r\n"
"dli $6, 0x10 \r\n" "dli $6, 0x10 \r\n"
"dli $7, 0x0101010101010101 \r\n"
"1: \r\n" "1: \r\n"
"lbu $4, 0($2) \r\n" "lbu $4, 0($2) \r\n"
"dmul $5, $4, $7 \r\n" "dmul $5, $4, %2 \r\n"
"sdl $5, 7($3) \r\n" "sdl $5, 7($3) \r\n"
"sdr $5, 0($3) \r\n" "sdr $5, 0($3) \r\n"
"daddu $2, %1 \r\n" "daddu $2, %1 \r\n"
"daddu $3, %1 \r\n" "daddu $3, %1 \r\n"
"daddiu $6, -1 \r\n" "daddiu $6, -1 \r\n"
"bnez $6, 1b \r\n" "bnez $6, 1b \r\n"
::"r"(src),"r"(stride) ::"r"(src),"r"(stride),"r"(ff_pb_1)
: "$2","$3","$4","$5","$6","memory" : "$2","$3","$4","$5","$6","memory"
); );
} }
...@@ -36,8 +36,6 @@ ...@@ -36,8 +36,6 @@
#define ROW_SHIFT 11 #define ROW_SHIFT 11
#define COL_SHIFT 20 #define COL_SHIFT 20
DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;
DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= { DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
...@@ -815,7 +813,7 @@ static void simple_idct_mmi(int16_t *block) ...@@ -815,7 +813,7 @@ static void simple_idct_mmi(int16_t *block)
//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) //IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"9: \n\t" "9: \n\t"
::"r"(block),"r"(temp),"r"(coeffs),"m"(wm1010),"m"(d40000) ::"r"(block),"r"(temp),"r"(coeffs),"m"(ff_wm1010),"m"(ff_d40000)
: "$10","$11" : "$10","$11"
); );
} }
...@@ -886,7 +884,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block, ...@@ -886,7 +884,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
int64_t line_skip3; int64_t line_skip3;
__asm__ volatile ( __asm__ volatile (
"ldc1 $f0, %4 \n\t" "dmtc1 %4, $f0 \n\t"
"daddu %1, %3, %3 \n\t" "daddu %1, %3, %3 \n\t"
"ldc1 $f2, 0(%2) \n\t" "ldc1 $f2, 0(%2) \n\t"
"ldc1 $f10, 8(%2) \n\t" "ldc1 $f10, 8(%2) \n\t"
...@@ -933,7 +931,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block, ...@@ -933,7 +931,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
"gssdxc1 $f6, 0(%0, $10) \n\t" "gssdxc1 $f6, 0(%0, $10) \n\t"
"gssdxc1 $f8, 0(%0, %1) \n\t" "gssdxc1 $f8, 0(%0, %1) \n\t"
: "+&r"(pixels),"=&r"(line_skip3) : "+&r"(pixels),"=&r"(line_skip3)
: "r"(block),"r"(line_skip),"m"(ff_pb_80) : "r"(block),"r"(line_skip),"r"(ff_pb_80)
: "$10","memory" : "$10","memory"
); );
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment