Commit 0ace686a authored by ZhouXiaoyong's avatar ZhouXiaoyong Committed by Michael Niedermayer

avcodec: optimize mathops for Loongson-3 v1

HAVE_LOONGSON is replaced by HAVE_LOONGSON3. Even Loongson-2E and 2F support
Loongson SIMD instructs but have low performance for decoding. We plan to focus
on optimizing Loongson-3A1000, 3B1500 and 3A1500, and modify the configure file
to support Loongson-2 series later by adding HAVE_LOONGSON2.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 4b8a8194
...@@ -211,6 +211,8 @@ if ((y) < (x)) {\ ...@@ -211,6 +211,8 @@ if ((y) < (x)) {\
# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32)) # define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32))
#endif /* FASTDIV */ #endif /* FASTDIV */
#ifndef ff_sqrt
#define ff_sqrt ff_sqrt
static inline av_const unsigned int ff_sqrt(unsigned int a) static inline av_const unsigned int ff_sqrt(unsigned int a)
{ {
unsigned int b; unsigned int b;
...@@ -230,6 +232,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a) ...@@ -230,6 +232,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a)
return b - (a < b * b); return b - (a < b * b);
} }
#endif
static inline int8_t ff_u8_to_s8(uint8_t a) static inline int8_t ff_u8_to_s8(uint8_t a)
{ {
......
/* /*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com> * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
* Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
* *
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
...@@ -27,14 +28,73 @@ ...@@ -27,14 +28,73 @@
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
#if HAVE_LOONGSON #if HAVE_LOONGSON3
#define MULH MULH
static inline av_const int MULH(int a, int b)
{
int c;
__asm__ ("dmult %1, %2 \n\t"
"mflo %0 \n\t"
"dsrl %0, %0, 32 \n\t"
: "=r"(c)
: "r"(a),"r"(b)
: "hi", "lo");
return c;
}
#define UMULH UMULH
static inline av_const unsigned UMULH(unsigned a, unsigned b)
{
unsigned c;
__asm__ ("dmultu %1, %2 \n\t"
"mflo %0 \n\t"
"dsrl %0, %0, 32 \n\t"
: "=r"(c)
: "r"(a),"r"(b)
: "hi", "lo");
return c;
}
#define mid_pred mid_pred
static inline av_const int mid_pred(int a, int b, int c)
{
int t = b;
__asm__ ("sgt $8, %1, %2 \n\t"
"movn %0, %1, $8 \n\t"
"movn %1, %2, $8 \n\t"
"sgt $8, %1, %3 \n\t"
"movz %1, %3, $8 \n\t"
"sgt $8, %0, %1 \n\t"
"movn %0, %1, $8 \n\t"
: "+&r"(t),"+&r"(a)
: "r"(b),"r"(c)
: "$8");
return t;
}
#define ff_sqrt ff_sqrt
static inline av_const unsigned int ff_sqrt(unsigned int a)
{
unsigned int b;
__asm__ ("ctc1 %1, $f0 \n\t"
"sqrt.s $f2, $f0 \n\t"
"cvt.w.s $f0, $f2 \n\t"
"cfc1 %0, $f0 \n\t"
: "=r"(b)
: "r"(a));
return b;
}
static inline av_const int64_t MAC64(int64_t d, int a, int b) static inline av_const int64_t MAC64(int64_t d, int a, int b)
{ {
int64_t m; int64_t m;
__asm__ ("dmult.g %1, %2, %3 \n\t" __asm__ ("dmult %2, %3 \n\t"
"daddu %0, %0, %1 \n\t" "mflo %1 \n\t"
: "+r"(d), "=&r"(m) : "r"(a), "r"(b)); "daddu %0, %0, %1 \n\t"
: "+r"(d), "=&r"(m) : "r"(a), "r"(b)
: "hi", "lo");
return d; return d;
} }
#define MAC64(d, a, b) ((d) = MAC64(d, a, b)) #define MAC64(d, a, b) ((d) = MAC64(d, a, b))
...@@ -42,14 +102,16 @@ static inline av_const int64_t MAC64(int64_t d, int a, int b) ...@@ -42,14 +102,16 @@ static inline av_const int64_t MAC64(int64_t d, int a, int b)
static inline av_const int64_t MLS64(int64_t d, int a, int b) static inline av_const int64_t MLS64(int64_t d, int a, int b)
{ {
int64_t m; int64_t m;
__asm__ ("dmult.g %1, %2, %3 \n\t" __asm__ ("dmult %2, %3 \n\t"
"dsubu %0, %0, %1 \n\t" "mflo %1 \n\t"
: "+r"(d), "=&r"(m) : "r"(a), "r"(b)); "dsubu %0, %0, %1 \n\t"
: "+r"(d), "=&r"(m) : "r"(a), "r"(b)
: "hi", "lo");
return d; return d;
} }
#define MLS64(d, a, b) ((d) = MLS64(d, a, b)) #define MLS64(d, a, b) ((d) = MLS64(d, a, b))
#endif #endif /* HAVE_LOONGSON3 */
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment