Commit be449fca authored by Diego Pettenò's avatar Diego Pettenò

Convert asm keyword into __asm__.

Neither the asm() nor the __asm__() keyword is part of the C99
standard, but while GCC accepts the former in C89 syntax, it is not
accepted in C99 unless GNU extensions are turned on (with -fasm). The
latter form is accepted in any syntax as an extension (without
requiring further command-line options).

Sun Studio C99 compiler also does not accept asm() while accepting
__asm__(), albeit reporting warnings that it's not valid C99 syntax.

Originally committed as revision 15627 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent a14b362f
...@@ -448,7 +448,7 @@ check_asm(){ ...@@ -448,7 +448,7 @@ check_asm(){
asm="$2" asm="$2"
shift 2 shift 2
check_cc "$@" <<EOF && enable $name || disable $name check_cc "$@" <<EOF && enable $name || disable $name
int foo(void){ asm volatile($asm); } int foo(void){ __asm__ volatile($asm); }
EOF EOF
} }
...@@ -1574,7 +1574,7 @@ if enabled x86; then ...@@ -1574,7 +1574,7 @@ if enabled x86; then
# base pointer is cleared in the inline assembly code. # base pointer is cleared in the inline assembly code.
check_exec_crash <<EOF && enable ebp_available check_exec_crash <<EOF && enable ebp_available
volatile int i=0; volatile int i=0;
asm volatile ( __asm__ volatile (
"xorl %%ebp, %%ebp" "xorl %%ebp, %%ebp"
::: "%ebp"); ::: "%ebp");
return i; return i;
...@@ -1934,7 +1934,7 @@ VHOOKCFLAGS="-fPIC" ...@@ -1934,7 +1934,7 @@ VHOOKCFLAGS="-fPIC"
# Find out if the .align argument is a power of two or not. # Find out if the .align argument is a power of two or not.
if test $asmalign_pot = "unknown"; then if test $asmalign_pot = "unknown"; then
disable asmalign_pot disable asmalign_pot
echo 'asm (".align 3");' | check_cc && enable asmalign_pot echo '__asm__ (".align 3");' | check_cc && enable asmalign_pot
fi fi
enabled_any $DECODER_LIST && enable decoders enabled_any $DECODER_LIST && enable decoders
......
...@@ -154,17 +154,17 @@ The minimum guaranteed alignment is written in the .h files, for example: ...@@ -154,17 +154,17 @@ The minimum guaranteed alignment is written in the .h files, for example:
General Tips: General Tips:
------------- -------------
Use asm loops like: Use asm loops like:
asm( __asm__(
"1: .... "1: ....
... ...
"jump_instruciton .... "jump_instruciton ....
Do not use C loops: Do not use C loops:
do{ do{
asm( __asm__(
... ...
}while() }while()
Use asm() instead of intrinsics. The latter requires a good optimizing compiler Use __asm__() instead of intrinsics. The latter requires a good optimizing compiler
which gcc is not. which gcc is not.
......
...@@ -105,21 +105,21 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); ...@@ -105,21 +105,21 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#define implver __builtin_alpha_implver #define implver __builtin_alpha_implver
#define rpcc __builtin_alpha_rpcc #define rpcc __builtin_alpha_rpcc
#else #else
#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") #define prefetch(p) __asm__ volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") #define prefetch_en(p) __asm__ volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") #define prefetch_m(p) __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") #define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define extql(a, b) ({ uint64_t __r; __asm__ ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define extwl(a, b) ({ uint64_t __r; __asm__ ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define extqh(a, b) ({ uint64_t __r; __asm__ ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define zap(a, b) ({ uint64_t __r; __asm__ ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) #define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) #define amask(a) ({ uint64_t __r; __asm__ ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) #define implver() ({ uint64_t __r; __asm__ ("implver %0" : "=r" (__r)); __r; })
#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) #define rpcc() ({ uint64_t __r; __asm__ volatile ("rpcc %0" : "=r" (__r)); __r; })
#endif #endif
#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") #define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory")
#if GNUC_PREREQ(3,3) && defined(__alpha_max__) #if GNUC_PREREQ(3,3) && defined(__alpha_max__)
#define minub8 __builtin_alpha_minub8 #define minub8 __builtin_alpha_minub8
...@@ -136,19 +136,19 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); ...@@ -136,19 +136,19 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#define unpkbl __builtin_alpha_unpkbl #define unpkbl __builtin_alpha_unpkbl
#define unpkbw __builtin_alpha_unpkbw #define unpkbw __builtin_alpha_unpkbw
#else #else
#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) #define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) #define perr(a, b) ({ uint64_t __r; __asm__ (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) #define pklb(a) ({ uint64_t __r; __asm__ (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) #define pkwb(a) ({ uint64_t __r; __asm__ (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) #define unpkbl(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) #define unpkbw(a) ({ uint64_t __r; __asm__ (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
#endif #endif
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
...@@ -158,31 +158,31 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); ...@@ -158,31 +158,31 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#define ldl(p) (*(const int32_t *) (p)) #define ldl(p) (*(const int32_t *) (p))
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0) #define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) #define ldq_u(a) __asm__ ("ldq_u %v0,0(%a0)", a)
#define uldq(a) (*(const __unaligned uint64_t *) (a)) #define uldq(a) (*(const __unaligned uint64_t *) (a))
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) #define cmpbge(a, b) __asm__ ("cmpbge %a0,%a1,%v0", a, b)
#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) #define extql(a, b) __asm__ ("extql %a0,%a1,%v0", a, b)
#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) #define extwl(a, b) __asm__ ("extwl %a0,%a1,%v0", a, b)
#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) #define extqh(a, b) __asm__ ("extqh %a0,%a1,%v0", a, b)
#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) #define zap(a, b) __asm__ ("zap %a0,%a1,%v0", a, b)
#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) #define zapnot(a, b) __asm__ ("zapnot %a0,%a1,%v0", a, b)
#define amask(a) asm ("amask %a0,%v0", a) #define amask(a) __asm__ ("amask %a0,%v0", a)
#define implver() asm ("implver %v0") #define implver() __asm__ ("implver %v0")
#define rpcc() asm ("rpcc %v0") #define rpcc() __asm__ ("rpcc %v0")
#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) #define minub8(a, b) __asm__ ("minub8 %a0,%a1,%v0", a, b)
#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) #define minsb8(a, b) __asm__ ("minsb8 %a0,%a1,%v0", a, b)
#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) #define minuw4(a, b) __asm__ ("minuw4 %a0,%a1,%v0", a, b)
#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b) #define minsw4(a, b) __asm__ ("minsw4 %a0,%a1,%v0", a, b)
#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b) #define maxub8(a, b) __asm__ ("maxub8 %a0,%a1,%v0", a, b)
#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b) #define maxsb8(a, b) __asm__ ("maxsb8 %a0,%a1,%v0", a, b)
#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b) #define maxuw4(a, b) __asm__ ("maxuw4 %a0,%a1,%v0", a, b)
#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b) #define maxsw4(a, b) __asm__ ("maxsw4 %a0,%a1,%v0", a, b)
#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b) #define perr(a, b) __asm__ ("perr %a0,%a1,%v0", a, b)
#define pklb(a) asm ("pklb %a0,%v0", a) #define pklb(a) __asm__ ("pklb %a0,%v0", a)
#define pkwb(a) asm ("pkwb %a0,%v0", a) #define pkwb(a) __asm__ ("pkwb %a0,%v0", a)
#define unpkbl(a) asm ("unpkbl %a0,%v0", a) #define unpkbl(a) __asm__ ("unpkbl %a0,%v0", a)
#define unpkbw(a) asm ("unpkbw %a0,%v0", a) #define unpkbw(a) __asm__ ("unpkbw %a0,%v0", a)
#define wh64(a) asm ("wh64 %a0", a) #define wh64(a) __asm__ ("wh64 %a0", a)
#else #else
#error "Unknown compiler!" #error "Unknown compiler!"
......
...@@ -66,7 +66,7 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) ...@@ -66,7 +66,7 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size) static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
{ {
asm volatile ( __asm__ volatile (
"mov r10, #8 \n\t" "mov r10, #8 \n\t"
"1: \n\t" "1: \n\t"
...@@ -206,7 +206,7 @@ static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) ...@@ -206,7 +206,7 @@ static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
#ifdef HAVE_ARMV5TE #ifdef HAVE_ARMV5TE
static void prefetch_arm(void *mem, int stride, int h) static void prefetch_arm(void *mem, int stride, int h)
{ {
asm volatile( __asm__ volatile(
"1: \n\t" "1: \n\t"
"subs %0, %0, #1 \n\t" "subs %0, %0, #1 \n\t"
"pld [%1] \n\t" "pld [%1] \n\t"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
#define SET_RND(regd) asm volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); #define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2b" #define WAVG2B "wavg2b"
#include "dsputil_iwmmxt_rnd.h" #include "dsputil_iwmmxt_rnd.h"
#undef DEF #undef DEF
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#undef WAVG2B #undef WAVG2B
#define DEF(x, y) x ## _ ## y ##_iwmmxt #define DEF(x, y) x ## _ ## y ##_iwmmxt
#define SET_RND(regd) asm volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); #define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2br" #define WAVG2B "wavg2br"
#include "dsputil_iwmmxt_rnd.h" #include "dsputil_iwmmxt_rnd.h"
#undef DEF #undef DEF
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
// need scheduling // need scheduling
#define OP(AVG) \ #define OP(AVG) \
asm volatile ( \ __asm__ volatile ( \
/* alignment */ \ /* alignment */ \
"and r12, %[pixels], #7 \n\t" \ "and r12, %[pixels], #7 \n\t" \
"bic %[pixels], %[pixels], #7 \n\t" \ "bic %[pixels], %[pixels], #7 \n\t" \
...@@ -89,7 +89,7 @@ void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_s ...@@ -89,7 +89,7 @@ void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_s
{ {
uint8_t *pixels2 = pixels + line_size; uint8_t *pixels2 = pixels + line_size;
asm volatile ( __asm__ volatile (
"mov r12, #4 \n\t" "mov r12, #4 \n\t"
"1: \n\t" "1: \n\t"
"pld [%[pixels], %[line_size2]] \n\t" "pld [%[pixels], %[line_size2]] \n\t"
...@@ -125,7 +125,7 @@ void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_s ...@@ -125,7 +125,7 @@ void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_s
static void clear_blocks_iwmmxt(DCTELEM *blocks) static void clear_blocks_iwmmxt(DCTELEM *blocks)
{ {
asm volatile( __asm__ volatile(
"wzero wr0 \n\t" "wzero wr0 \n\t"
"mov r1, #(128 * 6 / 32) \n\t" "mov r1, #(128 * 6 / 32) \n\t"
"1: \n\t" "1: \n\t"
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{ {
int stride = line_size; int stride = line_size;
asm volatile ( __asm__ volatile (
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t" "tmcr wcgr1, r12 \n\t"
...@@ -60,7 +60,7 @@ void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_siz ...@@ -60,7 +60,7 @@ void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_siz
void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{ {
int stride = line_size; int stride = line_size;
asm volatile ( __asm__ volatile (
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t" "tmcr wcgr1, r12 \n\t"
...@@ -102,7 +102,7 @@ void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_siz ...@@ -102,7 +102,7 @@ void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_siz
void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{ {
int stride = line_size; int stride = line_size;
asm volatile ( __asm__ volatile (
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t" "bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t" "tmcr wcgr1, r12 \n\t"
...@@ -142,7 +142,7 @@ void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_si ...@@ -142,7 +142,7 @@ void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_si
void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{ {
int stride = line_size; int stride = line_size;
asm volatile ( __asm__ volatile (
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t" "pld [%[block]] \n\t"
...@@ -201,7 +201,7 @@ void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_ ...@@ -201,7 +201,7 @@ void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
...@@ -250,7 +250,7 @@ void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line ...@@ -250,7 +250,7 @@ void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
...@@ -311,7 +311,7 @@ void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_ ...@@ -311,7 +311,7 @@ void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t" "pld [%[block]] \n\t"
...@@ -372,7 +372,7 @@ void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line ...@@ -372,7 +372,7 @@ void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t" "pld [%[block]] \n\t"
...@@ -448,7 +448,7 @@ void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_ ...@@ -448,7 +448,7 @@ void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_
int stride = line_size; int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
...@@ -502,7 +502,7 @@ void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line ...@@ -502,7 +502,7 @@ void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line
int stride = line_size; int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
...@@ -559,7 +559,7 @@ void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line ...@@ -559,7 +559,7 @@ void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line
int stride = line_size; int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t" "and r12, %[pixels], #7 \n\t"
...@@ -627,7 +627,7 @@ void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line ...@@ -627,7 +627,7 @@ void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"mov r12, #2 \n\t" "mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
...@@ -721,7 +721,7 @@ void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int lin ...@@ -721,7 +721,7 @@ void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int lin
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
"mov r12, #2 \n\t" "mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t" "pld [%[pixels], #32] \n\t"
...@@ -863,7 +863,7 @@ void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line ...@@ -863,7 +863,7 @@ void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[block]] \n\t" "pld [%[block]] \n\t"
"pld [%[block], #32] \n\t" "pld [%[block], #32] \n\t"
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
...@@ -967,7 +967,7 @@ void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int lin ...@@ -967,7 +967,7 @@ void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int lin
// [wr0 wr1 wr2 wr3] for previous line // [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line // [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"pld [%[block]] \n\t" "pld [%[block]] \n\t"
"pld [%[block], #32] \n\t" "pld [%[block], #32] \n\t"
"pld [%[pixels]] \n\t" "pld [%[pixels]] \n\t"
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
static void vector_fmul_vfp(float *dst, const float *src, int len) static void vector_fmul_vfp(float *dst, const float *src, int len)
{ {
int tmp; int tmp;
asm volatile( __asm__ volatile(
"fmrx %[tmp], fpscr\n\t" "fmrx %[tmp], fpscr\n\t"
"orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */ "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
"fmxr fpscr, %[tmp]\n\t" "fmxr fpscr, %[tmp]\n\t"
...@@ -90,7 +90,7 @@ static void vector_fmul_vfp(float *dst, const float *src, int len) ...@@ -90,7 +90,7 @@ static void vector_fmul_vfp(float *dst, const float *src, int len)
static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float *src1, int len) static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float *src1, int len)
{ {
src1 += len; src1 += len;
asm volatile( __asm__ volatile(
"fldmdbs %[src1]!, {s0-s3}\n\t" "fldmdbs %[src1]!, {s0-s3}\n\t"
"fldmias %[src0]!, {s8-s11}\n\t" "fldmias %[src0]!, {s8-s11}\n\t"
"fldmdbs %[src1]!, {s4-s7}\n\t" "fldmdbs %[src1]!, {s4-s7}\n\t"
...@@ -149,7 +149,7 @@ static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float * ...@@ -149,7 +149,7 @@ static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float *
*/ */
void float_to_int16_vfp(int16_t *dst, const float *src, int len) void float_to_int16_vfp(int16_t *dst, const float *src, int len)
{ {
asm volatile( __asm__ volatile(
"fldmias %[src]!, {s16-s23}\n\t" "fldmias %[src]!, {s16-s23}\n\t"
"ftosis s0, s16\n\t" "ftosis s0, s16\n\t"
"ftosis s1, s17\n\t" "ftosis s1, s17\n\t"
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#ifdef FRAC_BITS #ifdef FRAC_BITS
# define MULL(a, b) \ # define MULL(a, b) \
({ int lo, hi;\ ({ int lo, hi;\
asm("smull %0, %1, %2, %3 \n\t"\ __asm__("smull %0, %1, %2, %3 \n\t"\
"mov %0, %0, lsr %4\n\t"\ "mov %0, %0, lsr %4\n\t"\
"add %1, %0, %1, lsl %5\n\t"\ "add %1, %0, %1, lsl %5\n\t"\
: "=&r"(lo), "=&r"(hi)\ : "=&r"(lo), "=&r"(hi)\
...@@ -37,21 +37,21 @@ ...@@ -37,21 +37,21 @@
static inline av_const int MULH(int a, int b) static inline av_const int MULH(int a, int b)
{ {
int r; int r;
asm ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
return r; return r;
} }
#define MULH MULH #define MULH MULH
#else #else
#define MULH(a, b) \ #define MULH(a, b) \
({ int lo, hi;\ ({ int lo, hi;\
asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\ __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
hi; }) hi; })
#endif #endif
static inline av_const int64_t MUL64(int a, int b) static inline av_const int64_t MUL64(int a, int b)
{ {
union { uint64_t x; unsigned hl[2]; } x; union { uint64_t x; unsigned hl[2]; } x;
asm ("smull %0, %1, %2, %3" __asm__ ("smull %0, %1, %2, %3"
: "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b)); : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
return x.x; return x.x;
} }
...@@ -60,7 +60,7 @@ static inline av_const int64_t MUL64(int a, int b) ...@@ -60,7 +60,7 @@ static inline av_const int64_t MUL64(int a, int b)
static inline av_const int64_t MAC64(int64_t d, int a, int b) static inline av_const int64_t MAC64(int64_t d, int a, int b)
{ {
union { uint64_t x; unsigned hl[2]; } x = { d }; union { uint64_t x; unsigned hl[2]; } x = { d };
asm ("smlal %0, %1, %2, %3" __asm__ ("smlal %0, %1, %2, %3"
: "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b)); : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
return x.x; return x.x;
} }
...@@ -71,11 +71,11 @@ static inline av_const int64_t MAC64(int64_t d, int a, int b) ...@@ -71,11 +71,11 @@ static inline av_const int64_t MAC64(int64_t d, int a, int b)
/* signed 16x16 -> 32 multiply add accumulate */ /* signed 16x16 -> 32 multiply add accumulate */
# define MAC16(rt, ra, rb) \ # define MAC16(rt, ra, rb) \
asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); __asm__ ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */ /* signed 16x16 -> 32 multiply */
# define MUL16(ra, rb) \ # define MUL16(ra, rb) \
({ int __rt; \ ({ int __rt; \
asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \ __asm__ ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
__rt; }) __rt; })
#endif #endif
......
...@@ -65,7 +65,7 @@ static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qa ...@@ -65,7 +65,7 @@ static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qa
({ DCTELEM *xblock = xxblock; \ ({ DCTELEM *xblock = xxblock; \
int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \ int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
int xdata1, xdata2; \ int xdata1, xdata2; \
asm volatile( \ __asm__ volatile( \
"subs %[count], %[count], #2 \n\t" \ "subs %[count], %[count], #2 \n\t" \
"ble 2f \n\t" \ "ble 2f \n\t" \
"ldrd r4, [%[block], #0] \n\t" \ "ldrd r4, [%[block], #0] \n\t" \
......
...@@ -48,7 +48,7 @@ static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s, ...@@ -48,7 +48,7 @@ static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
else else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
asm volatile ( __asm__ volatile (
/* "movd %1, %%mm6 \n\t" //qmul */ /* "movd %1, %%mm6 \n\t" //qmul */
/* "packssdw %%mm6, %%mm6 \n\t" */ /* "packssdw %%mm6, %%mm6 \n\t" */
/* "packssdw %%mm6, %%mm6 \n\t" */ /* "packssdw %%mm6, %%mm6 \n\t" */
......
...@@ -77,7 +77,7 @@ static void bfin_clear_blocks (DCTELEM *blocks) ...@@ -77,7 +77,7 @@ static void bfin_clear_blocks (DCTELEM *blocks)
{ {
// This is just a simple memset. // This is just a simple memset.
// //
asm("P0=192; " __asm__("P0=192; "
"I0=%0; " "I0=%0; "
"R0=0; " "R0=0; "
"LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;" "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifdef CONFIG_MPEGAUDIO_HP #ifdef CONFIG_MPEGAUDIO_HP
#define MULH(X,Y) ({ int xxo; \ #define MULH(X,Y) ({ int xxo; \
asm ( \ __asm__ ( \
"a1 = %2.L * %1.L (FU);\n\t" \ "a1 = %2.L * %1.L (FU);\n\t" \
"a1 = a1 >> 16;\n\t" \ "a1 = a1 >> 16;\n\t" \
"a1 += %2.H * %1.L (IS,M);\n\t" \ "a1 += %2.H * %1.L (IS,M);\n\t" \
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
: "=d" (xxo) : "d" (X), "d" (Y) : "A0","A1"); xxo; }) : "=d" (xxo) : "d" (X), "d" (Y) : "A0","A1"); xxo; })
#else #else
#define MULH(X,Y) ({ int xxo; \ #define MULH(X,Y) ({ int xxo; \
asm ( \ __asm__ ( \
"a1 = %2.H * %1.L (IS,M);\n\t" \ "a1 = %2.H * %1.L (IS,M);\n\t" \
"a0 = %1.H * %2.H, a1+= %1.H * %2.L (IS,M);\n\t"\ "a0 = %1.H * %2.H, a1+= %1.H * %2.L (IS,M);\n\t"\
"a1 = a1 >>> 16;\n\t" \ "a1 = a1 >>> 16;\n\t" \
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
/* signed 16x16 -> 32 multiply */ /* signed 16x16 -> 32 multiply */
#define MUL16(a, b) ({ int xxo; \ #define MUL16(a, b) ({ int xxo; \
asm ( \ __asm__ ( \
"%0 = %1.l*%2.l (is);\n\t" \ "%0 = %1.l*%2.l (is);\n\t" \
: "=W" (xxo) : "d" (a), "d" (b) : "A1"); \ : "=W" (xxo) : "d" (a), "d" (b) : "A1"); \
xxo; }) xxo; })
......
...@@ -88,7 +88,7 @@ static int dct_quantize_bfin (MpegEncContext *s, ...@@ -88,7 +88,7 @@ static int dct_quantize_bfin (MpegEncContext *s,
/* block[i] = level; */ /* block[i] = level; */
/* } */ /* } */
asm volatile __asm__ volatile
("i2=%1;\n\t" ("i2=%1;\n\t"
"r1=[%1++]; \n\t" "r1=[%1++]; \n\t"
"r0=r1>>>15 (v); \n\t" "r0=r1>>>15 (v); \n\t"
...@@ -114,7 +114,7 @@ static int dct_quantize_bfin (MpegEncContext *s, ...@@ -114,7 +114,7 @@ static int dct_quantize_bfin (MpegEncContext *s,
PROF("zzscan",5); PROF("zzscan",5);
asm volatile __asm__ volatile
("r0=b[%1--] (x); \n\t" ("r0=b[%1--] (x); \n\t"
"lsetup (0f,1f) lc0=%3; \n\t" /* for(i=63; i>=start_i; i--) { */ "lsetup (0f,1f) lc0=%3; \n\t" /* for(i=63; i>=start_i; i--) { */
"0: p0=r0; \n\t" /* j = scantable[i]; */ "0: p0=r0; \n\t" /* j = scantable[i]; */
......
...@@ -55,14 +55,14 @@ extern const uint8_t ff_reverse[256]; ...@@ -55,14 +55,14 @@ extern const uint8_t ff_reverse[256];
#if defined(ARCH_X86) #if defined(ARCH_X86)
// avoid +32 for shift optimization (gcc should do that ...) // avoid +32 for shift optimization (gcc should do that ...)
static inline int32_t NEG_SSR32( int32_t a, int8_t s){ static inline int32_t NEG_SSR32( int32_t a, int8_t s){
asm ("sarl %1, %0\n\t" __asm__ ("sarl %1, %0\n\t"
: "+r" (a) : "+r" (a)
: "ic" ((uint8_t)(-s)) : "ic" ((uint8_t)(-s))
); );
return a; return a;
} }
static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
asm ("shrl %1, %0\n\t" __asm__ ("shrl %1, %0\n\t"
: "+r" (a) : "+r" (a)
: "ic" ((uint8_t)(-s)) : "ic" ((uint8_t)(-s))
); );
...@@ -248,7 +248,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) ...@@ -248,7 +248,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
{ {
# ifdef ALIGNED_BITSTREAM_WRITER # ifdef ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86) # if defined(ARCH_X86)
asm volatile( __asm__ volatile(
"movl %0, %%ecx \n\t" "movl %0, %%ecx \n\t"
"xorl %%eax, %%eax \n\t" "xorl %%eax, %%eax \n\t"
"shrdl %%cl, %1, %%eax \n\t" "shrdl %%cl, %1, %%eax \n\t"
...@@ -279,7 +279,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) ...@@ -279,7 +279,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
# endif # endif
# else //ALIGNED_BITSTREAM_WRITER # else //ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86) # if defined(ARCH_X86)
asm volatile( __asm__ volatile(
"movl $7, %%ecx \n\t" "movl $7, %%ecx \n\t"
"andl %0, %%ecx \n\t" "andl %0, %%ecx \n\t"
"addl %3, %%ecx \n\t" "addl %3, %%ecx \n\t"
...@@ -556,7 +556,7 @@ static inline void skip_bits_long(GetBitContext *s, int n){ ...@@ -556,7 +556,7 @@ static inline void skip_bits_long(GetBitContext *s, int n){
#if defined(ARCH_X86) #if defined(ARCH_X86)
# define SKIP_CACHE(name, gb, num)\ # define SKIP_CACHE(name, gb, num)\
asm(\ __asm__(\
"shldl %2, %1, %0 \n\t"\ "shldl %2, %1, %0 \n\t"\
"shll %2, %1 \n\t"\ "shll %2, %1 \n\t"\
: "+r" (name##_cache0), "+r" (name##_cache1)\ : "+r" (name##_cache0), "+r" (name##_cache1)\
......
...@@ -304,7 +304,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ ...@@ -304,7 +304,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
int temp; int temp;
#if 0 #if 0
//P3:683 athlon:475 //P3:683 athlon:475
asm( __asm__(
"lea -0x100(%0), %2 \n\t" "lea -0x100(%0), %2 \n\t"
"shr $31, %2 \n\t" //FIXME 31->63 for x86-64 "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
"shl %%cl, %0 \n\t" "shl %%cl, %0 \n\t"
...@@ -313,7 +313,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ ...@@ -313,7 +313,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
); );
#elif 0 #elif 0
//P3:680 athlon:474 //P3:680 athlon:474
asm( __asm__(
"cmp $0x100, %0 \n\t" "cmp $0x100, %0 \n\t"
"setb %%cl \n\t" //FIXME 31->63 for x86-64 "setb %%cl \n\t" //FIXME 31->63 for x86-64
"shl %%cl, %0 \n\t" "shl %%cl, %0 \n\t"
...@@ -323,7 +323,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ ...@@ -323,7 +323,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
#elif 1 #elif 1
int temp2; int temp2;
//P3:665 athlon:517 //P3:665 athlon:517
asm( __asm__(
"lea -0x100(%0), %%eax \n\t" "lea -0x100(%0), %%eax \n\t"
"cltd \n\t" "cltd \n\t"
"mov %0, %%eax \n\t" "mov %0, %%eax \n\t"
...@@ -336,7 +336,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ ...@@ -336,7 +336,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
#elif 0 #elif 0
int temp2; int temp2;
//P3:673 athlon:509 //P3:673 athlon:509
asm( __asm__(
"cmp $0x100, %0 \n\t" "cmp $0x100, %0 \n\t"
"sbb %%edx, %%edx \n\t" "sbb %%edx, %%edx \n\t"
"mov %0, %%eax \n\t" "mov %0, %%eax \n\t"
...@@ -349,7 +349,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){ ...@@ -349,7 +349,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
#else #else
int temp2; int temp2;
//P3:677 athlon:511 //P3:677 athlon:511
asm( __asm__(
"cmp $0x100, %0 \n\t" "cmp $0x100, %0 \n\t"
"lea (%0, %0), %%eax \n\t" "lea (%0, %0), %%eax \n\t"
"lea (%1, %1), %%edx \n\t" "lea (%1, %1), %%edx \n\t"
...@@ -385,7 +385,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st ...@@ -385,7 +385,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st
int bit; int bit;
#ifndef BRANCHLESS_CABAC_DECODER #ifndef BRANCHLESS_CABAC_DECODER
asm volatile( __asm__ volatile(
"movzbl (%1), %0 \n\t" "movzbl (%1), %0 \n\t"
"movl "RANGE "(%2), %%ebx \n\t" "movl "RANGE "(%2), %%ebx \n\t"
"movl "RANGE "(%2), %%edx \n\t" "movl "RANGE "(%2), %%edx \n\t"
...@@ -524,7 +524,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st ...@@ -524,7 +524,7 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st
"add "tmp" , "low" \n\t"\ "add "tmp" , "low" \n\t"\
"1: \n\t" "1: \n\t"
asm volatile( __asm__ volatile(
"movl "RANGE "(%2), %%esi \n\t" "movl "RANGE "(%2), %%esi \n\t"
"movl "LOW "(%2), %%ebx \n\t" "movl "LOW "(%2), %%ebx \n\t"
BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
...@@ -591,7 +591,7 @@ static int av_unused get_cabac(CABACContext *c, uint8_t * const state){ ...@@ -591,7 +591,7 @@ static int av_unused get_cabac(CABACContext *c, uint8_t * const state){
static int av_unused get_cabac_bypass(CABACContext *c){ static int av_unused get_cabac_bypass(CABACContext *c){
#if 0 //not faster #if 0 //not faster
int bit; int bit;
asm volatile( __asm__ volatile(
"movl "RANGE "(%1), %%ebx \n\t" "movl "RANGE "(%1), %%ebx \n\t"
"movl "LOW "(%1), %%eax \n\t" "movl "LOW "(%1), %%eax \n\t"
"shl $17, %%ebx \n\t" "shl $17, %%ebx \n\t"
...@@ -638,7 +638,7 @@ static int av_unused get_cabac_bypass(CABACContext *c){ ...@@ -638,7 +638,7 @@ static int av_unused get_cabac_bypass(CABACContext *c){
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) #if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
asm volatile( __asm__ volatile(
"movl "RANGE "(%1), %%ebx \n\t" "movl "RANGE "(%1), %%ebx \n\t"
"movl "LOW "(%1), %%eax \n\t" "movl "LOW "(%1), %%eax \n\t"
"shl $17, %%ebx \n\t" "shl $17, %%ebx \n\t"
......
...@@ -177,7 +177,7 @@ static inline void mmx_emms(void) ...@@ -177,7 +177,7 @@ static inline void mmx_emms(void)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
if (cpu_flags & MM_MMX) if (cpu_flags & MM_MMX)
asm volatile ("emms\n\t"); __asm__ volatile ("emms\n\t");
#endif #endif
} }
......
...@@ -579,7 +579,7 @@ void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int li ...@@ -579,7 +579,7 @@ void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int li
static inline void emms(void) static inline void emms(void)
{ {
asm volatile ("emms;":::"memory"); __asm__ volatile ("emms;":::"memory");
} }
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
static inline void cavs_idct8_1d(int16_t *block, uint64_t bias) static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
{ {
asm volatile( __asm__ volatile(
"movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */ "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
"movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */ "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
"movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */ "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
...@@ -120,7 +120,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -120,7 +120,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
cavs_idct8_1d(block+4*i, ff_pw_4); cavs_idct8_1d(block+4*i, ff_pw_4);
asm volatile( __asm__ volatile(
"psraw $3, %%mm7 \n\t" "psraw $3, %%mm7 \n\t"
"psraw $3, %%mm6 \n\t" "psraw $3, %%mm6 \n\t"
"psraw $3, %%mm5 \n\t" "psraw $3, %%mm5 \n\t"
...@@ -150,7 +150,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -150,7 +150,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
for(i=0; i<2; i++){ for(i=0; i<2; i++){
cavs_idct8_1d(b2+4*i, ff_pw_64); cavs_idct8_1d(b2+4*i, ff_pw_64);
asm volatile( __asm__ volatile(
"psraw $7, %%mm7 \n\t" "psraw $7, %%mm7 \n\t"
"psraw $7, %%mm6 \n\t" "psraw $7, %%mm6 \n\t"
"psraw $7, %%mm5 \n\t" "psraw $7, %%mm5 \n\t"
...@@ -175,7 +175,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -175,7 +175,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
add_pixels_clamped_mmx(b2, dst, stride); add_pixels_clamped_mmx(b2, dst, stride);
/* clear block */ /* clear block */
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"movq %%mm7, (%0) \n\t" "movq %%mm7, (%0) \n\t"
"movq %%mm7, 8(%0) \n\t" "movq %%mm7, 8(%0) \n\t"
...@@ -275,7 +275,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -275,7 +275,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
src -= 2*srcStride;\ src -= 2*srcStride;\
\ \
while(w--){\ while(w--){\
asm volatile(\ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
"movd (%0), %%mm0 \n\t"\ "movd (%0), %%mm0 \n\t"\
"add %2, %0 \n\t"\ "add %2, %0 \n\t"\
...@@ -306,7 +306,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -306,7 +306,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
: "memory"\ : "memory"\
);\ );\
if(h==16){\ if(h==16){\
asm volatile(\ __asm__ volatile(\
VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
...@@ -328,7 +328,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -328,7 +328,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
#define QPEL_CAVS(OPNAME, OP, MMX)\ #define QPEL_CAVS(OPNAME, OP, MMX)\
static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
int h=8;\ int h=8;\
asm volatile(\ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
"movq %5, %%mm6 \n\t"\ "movq %5, %%mm6 \n\t"\
"1: \n\t"\ "1: \n\t"\
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */ /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index,eax,ebx,ecx,edx)\ #define cpuid(index,eax,ebx,ecx,edx)\
asm volatile\ __asm__ volatile\
("mov %%"REG_b", %%"REG_S"\n\t"\ ("mov %%"REG_b", %%"REG_S"\n\t"\
"cpuid\n\t"\ "cpuid\n\t"\
"xchg %%"REG_b", %%"REG_S\ "xchg %%"REG_b", %%"REG_S\
...@@ -44,7 +44,7 @@ int mm_support(void) ...@@ -44,7 +44,7 @@ int mm_support(void)
int max_std_level, max_ext_level, std_caps=0, ext_caps=0; int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
x86_reg a, c; x86_reg a, c;
asm volatile ( __asm__ volatile (
/* See if CPUID instruction is supported ... */ /* See if CPUID instruction is supported ... */
/* ... Get copies of EFLAGS into eax and ecx */ /* ... Get copies of EFLAGS into eax and ecx */
"pushf\n\t" "pushf\n\t"
......
...@@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3; rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3;
asm volatile( __asm__ volatile(
"movd %0, %%mm5\n\t" "movd %0, %%mm5\n\t"
"movq %1, %%mm4\n\t" "movq %1, %%mm4\n\t"
"movq %2, %%mm6\n\t" /* mm6 = rnd */ "movq %2, %%mm6\n\t" /* mm6 = rnd */
...@@ -58,13 +58,13 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -58,13 +58,13 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
:: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg)); :: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg));
for(i=0; i<h; i++) { for(i=0; i<h; i++) {
asm volatile( __asm__ volatile(
/* mm0 = src[0..7], mm1 = src[1..8] */ /* mm0 = src[0..7], mm1 = src[1..8] */
"movq %0, %%mm0\n\t" "movq %0, %%mm0\n\t"
"movq %1, %%mm2\n\t" "movq %1, %%mm2\n\t"
:: "m"(src[0]), "m"(src[dxy])); :: "m"(src[0]), "m"(src[dxy]));
asm volatile( __asm__ volatile(
/* [mm0,mm1] = A * src[0..7] */ /* [mm0,mm1] = A * src[0..7] */
/* [mm2,mm3] = B * src[1..8] */ /* [mm2,mm3] = B * src[1..8] */
"movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm1\n\t"
...@@ -98,7 +98,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -98,7 +98,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
/* general case, bilinear */ /* general case, bilinear */
rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a; rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a;
asm volatile("movd %2, %%mm4\n\t" __asm__ volatile("movd %2, %%mm4\n\t"
"movd %3, %%mm6\n\t" "movd %3, %%mm6\n\t"
"punpcklwd %%mm4, %%mm4\n\t" "punpcklwd %%mm4, %%mm4\n\t"
"punpcklwd %%mm6, %%mm6\n\t" "punpcklwd %%mm6, %%mm6\n\t"
...@@ -119,7 +119,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -119,7 +119,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
"movq %%mm4, %0\n\t" "movq %%mm4, %0\n\t"
: "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64)); : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
asm volatile( __asm__ volatile(
/* mm0 = src[0..7], mm1 = src[1..8] */ /* mm0 = src[0..7], mm1 = src[1..8] */
"movq %0, %%mm0\n\t" "movq %0, %%mm0\n\t"
"movq %1, %%mm1\n\t" "movq %1, %%mm1\n\t"
...@@ -128,7 +128,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -128,7 +128,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
for(i=0; i<h; i++) { for(i=0; i<h; i++) {
src += stride; src += stride;
asm volatile( __asm__ volatile(
/* mm2 = A * src[0..3] + B * src[1..4] */ /* mm2 = A * src[0..3] + B * src[1..4] */
/* mm3 = A * src[4..7] + B * src[5..8] */ /* mm3 = A * src[4..7] + B * src[5..8] */
"movq %%mm0, %%mm2\n\t" "movq %%mm0, %%mm2\n\t"
...@@ -145,7 +145,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -145,7 +145,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
"paddw %%mm0, %%mm3\n\t" "paddw %%mm0, %%mm3\n\t"
: : "m" (AA)); : : "m" (AA));
asm volatile( __asm__ volatile(
/* [mm2,mm3] += C * src[0..7] */ /* [mm2,mm3] += C * src[0..7] */
"movq %0, %%mm0\n\t" "movq %0, %%mm0\n\t"
"movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm1\n\t"
...@@ -157,7 +157,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -157,7 +157,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
"paddw %%mm1, %%mm3\n\t" "paddw %%mm1, %%mm3\n\t"
: : "m" (src[0])); : : "m" (src[0]));
asm volatile( __asm__ volatile(
/* [mm2,mm3] += D * src[1..8] */ /* [mm2,mm3] += D * src[1..8] */
"movq %1, %%mm1\n\t" "movq %1, %%mm1\n\t"
"movq %%mm1, %%mm0\n\t" "movq %%mm1, %%mm0\n\t"
...@@ -171,7 +171,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -171,7 +171,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
"movq %0, %%mm0\n\t" "movq %0, %%mm0\n\t"
: : "m" (src[0]), "m" (src[1]), "m" (DD)); : : "m" (src[0]), "m" (src[1]), "m" (DD));
asm volatile( __asm__ volatile(
/* dst[0..7] = ([mm2,mm3] + 32) >> 6 */ /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */
"paddw %1, %%mm2\n\t" "paddw %1, %%mm2\n\t"
"paddw %1, %%mm3\n\t" "paddw %1, %%mm3\n\t"
...@@ -187,7 +187,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -187,7 +187,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{ {
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"movd %5, %%mm2 \n\t" "movd %5, %%mm2 \n\t"
"movd %6, %%mm3 \n\t" "movd %6, %%mm3 \n\t"
...@@ -259,7 +259,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* ...@@ -259,7 +259,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*
int tmp = ((1<<16)-1)*x + 8; int tmp = ((1<<16)-1)*x + 8;
int CD= tmp*y; int CD= tmp*y;
int AB= (tmp<<3) - CD; int AB= (tmp<<3) - CD;
asm volatile( __asm__ volatile(
/* mm5 = {A,B,A,B} */ /* mm5 = {A,B,A,B} */
/* mm6 = {C,D,C,D} */ /* mm6 = {C,D,C,D} */
"movd %0, %%mm5\n\t" "movd %0, %%mm5\n\t"
...@@ -274,7 +274,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* ...@@ -274,7 +274,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*
:: "r"(AB), "r"(CD), "m"(src[0])); :: "r"(AB), "r"(CD), "m"(src[0]));
asm volatile( __asm__ volatile(
"1:\n\t" "1:\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* mm1 = A * src[0,1] + B * src[1,2] */ /* mm1 = A * src[0,1] + B * src[1,2] */
......
...@@ -37,7 +37,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -37,7 +37,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
if(y==0 || x==0) if(y==0 || x==0)
{ {
/* 1 dimensional filter only */ /* 1 dimensional filter only */
asm volatile( __asm__ volatile(
"movd %0, %%xmm7 \n\t" "movd %0, %%xmm7 \n\t"
"movq %1, %%xmm6 \n\t" "movq %1, %%xmm6 \n\t"
"pshuflw $0, %%xmm7, %%xmm7 \n\t" "pshuflw $0, %%xmm7, %%xmm7 \n\t"
...@@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
); );
if(x) { if(x) {
asm volatile( __asm__ volatile(
"1: \n\t" "1: \n\t"
"movq (%1), %%xmm0 \n\t" "movq (%1), %%xmm0 \n\t"
"movq 1(%1), %%xmm1 \n\t" "movq 1(%1), %%xmm1 \n\t"
...@@ -75,7 +75,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -75,7 +75,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
:"r"((x86_reg)stride) :"r"((x86_reg)stride)
); );
} else { } else {
asm volatile( __asm__ volatile(
"1: \n\t" "1: \n\t"
"movq (%1), %%xmm0 \n\t" "movq (%1), %%xmm0 \n\t"
"movq (%1,%3), %%xmm1 \n\t" "movq (%1,%3), %%xmm1 \n\t"
...@@ -107,7 +107,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -107,7 +107,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
} }
/* general case, bilinear */ /* general case, bilinear */
asm volatile( __asm__ volatile(
"movd %0, %%xmm7 \n\t" "movd %0, %%xmm7 \n\t"
"movd %1, %%xmm6 \n\t" "movd %1, %%xmm6 \n\t"
"movdqa %2, %%xmm5 \n\t" "movdqa %2, %%xmm5 \n\t"
...@@ -118,7 +118,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -118,7 +118,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
:: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28)) :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28))
); );
asm volatile( __asm__ volatile(
"movq (%1), %%xmm0 \n\t" "movq (%1), %%xmm0 \n\t"
"movq 1(%1), %%xmm1 \n\t" "movq 1(%1), %%xmm1 \n\t"
"punpcklbw %%xmm1, %%xmm0 \n\t" "punpcklbw %%xmm1, %%xmm0 \n\t"
...@@ -160,7 +160,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -160,7 +160,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{ {
asm volatile( __asm__ volatile(
"movd %0, %%mm7 \n\t" "movd %0, %%mm7 \n\t"
"movd %1, %%mm6 \n\t" "movd %1, %%mm6 \n\t"
"movq %2, %%mm5 \n\t" "movq %2, %%mm5 \n\t"
...@@ -169,7 +169,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* ...@@ -169,7 +169,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*
:: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32) :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32)
); );
asm volatile( __asm__ volatile(
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
"punpcklbw 1(%1), %%mm0 \n\t" "punpcklbw 1(%1), %%mm0 \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
......
This diff is collapsed.
...@@ -127,7 +127,7 @@ extern const double ff_pd_2[2]; ...@@ -127,7 +127,7 @@ extern const double ff_pd_2[2];
#endif #endif
#define MOVQ_WONE(regd) \ #define MOVQ_WONE(regd) \
asm volatile ( \ __asm__ volatile ( \
"pcmpeqd %%" #regd ", %%" #regd " \n\t" \ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
"psrlw $15, %%" #regd ::) "psrlw $15, %%" #regd ::)
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
*/ */
static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -61,7 +61,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -61,7 +61,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
...@@ -112,7 +112,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -112,7 +112,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -162,7 +162,7 @@ static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -162,7 +162,7 @@ static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"pcmpeqb %%mm6, %%mm6 \n\t" "pcmpeqb %%mm6, %%mm6 \n\t"
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
...@@ -232,7 +232,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src ...@@ -232,7 +232,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src
static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
...@@ -284,7 +284,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -284,7 +284,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -339,7 +339,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -339,7 +339,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -379,7 +379,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -379,7 +379,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line
static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -427,7 +427,7 @@ static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -427,7 +427,7 @@ static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -481,7 +481,7 @@ static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -481,7 +481,7 @@ static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
asm volatile( __asm__ volatile(
"pcmpeqb %%mm6, %%mm6 \n\t" "pcmpeqb %%mm6, %%mm6 \n\t"
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
...@@ -556,7 +556,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr ...@@ -556,7 +556,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr
static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
MOVQ_BONE(mm6); MOVQ_BONE(mm6);
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -592,7 +592,7 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in ...@@ -592,7 +592,7 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in
static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"sub %3, %2 \n\t" "sub %3, %2 \n\t"
...@@ -624,7 +624,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -624,7 +624,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
MOVQ_BONE(mm6); MOVQ_BONE(mm6);
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"sub %3, %2 \n\t" "sub %3, %2 \n\t"
...@@ -656,7 +656,7 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in ...@@ -656,7 +656,7 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in
static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t" "1: \n\t"
"movq (%2), %%mm0 \n\t" "movq (%2), %%mm0 \n\t"
...@@ -684,7 +684,7 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -684,7 +684,7 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz
static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -716,7 +716,7 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -716,7 +716,7 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"sub %3, %2 \n\t" "sub %3, %2 \n\t"
...@@ -757,7 +757,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -757,7 +757,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
MOVQ_BONE(mm6); MOVQ_BONE(mm6);
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
PAVGB" 1(%1), %%mm0 \n\t" PAVGB" 1(%1), %%mm0 \n\t"
...@@ -798,7 +798,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -798,7 +798,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
do { do {
asm volatile( __asm__ volatile(
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
"movd (%1, %2), %%mm1 \n\t" "movd (%1, %2), %%mm1 \n\t"
"movd (%1, %2, 2), %%mm2 \n\t" "movd (%1, %2, 2), %%mm2 \n\t"
...@@ -852,7 +852,7 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -852,7 +852,7 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin
#define QPEL_2TAP_L3(OPNAME) \ #define QPEL_2TAP_L3(OPNAME) \
static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
asm volatile(\ __asm__ volatile(\
"1: \n\t"\ "1: \n\t"\
"movq (%1,%2), %%mm0 \n\t"\ "movq (%1,%2), %%mm0 \n\t"\
"movq 8(%1,%2), %%mm1 \n\t"\ "movq 8(%1,%2), %%mm1 \n\t"\
...@@ -874,7 +874,7 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride ...@@ -874,7 +874,7 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride
);\ );\
}\ }\
static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
asm volatile(\ __asm__ volatile(\
"1: \n\t"\ "1: \n\t"\
"movq (%1,%2), %%mm0 \n\t"\ "movq (%1,%2), %%mm0 \n\t"\
PAVGB" (%1,%3), %%mm0 \n\t"\ PAVGB" (%1,%3), %%mm0 \n\t"\
......
...@@ -36,7 +36,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[ ...@@ -36,7 +36,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
SET_RND(mm6); SET_RND(mm6);
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"movd %4, %%mm5 \n\t" "movd %4, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t"
...@@ -77,7 +77,7 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) ...@@ -77,7 +77,7 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
if(FFABS(scale) < MAX_ABS){ if(FFABS(scale) < MAX_ABS){
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
SET_RND(mm6); SET_RND(mm6);
asm volatile( __asm__ volatile(
"movd %3, %%mm5 \n\t" "movd %3, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t"
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3) ASMALIGN(3)
"1: \n\t" "1: \n\t"
...@@ -64,7 +64,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -64,7 +64,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -114,7 +114,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -114,7 +114,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3) ASMALIGN(3)
"1: \n\t" "1: \n\t"
...@@ -160,7 +160,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -160,7 +160,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
asm volatile( __asm__ volatile(
"testl $1, %0 \n\t" "testl $1, %0 \n\t"
" jz 1f \n\t" " jz 1f \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -209,7 +209,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -209,7 +209,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
ASMALIGN(3) ASMALIGN(3)
...@@ -239,7 +239,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -239,7 +239,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
{ {
MOVQ_ZERO(mm7); MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t" "movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
...@@ -307,7 +307,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i ...@@ -307,7 +307,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
do { do {
asm volatile( __asm__ volatile(
"movd %0, %%mm0 \n\t" "movd %0, %%mm0 \n\t"
"movd %1, %%mm1 \n\t" "movd %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
...@@ -327,7 +327,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si ...@@ -327,7 +327,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
do { do {
asm volatile( __asm__ volatile(
"movq %0, %%mm0 \n\t" "movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t" "movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
...@@ -346,7 +346,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s ...@@ -346,7 +346,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
do { do {
asm volatile( __asm__ volatile(
"movq %0, %%mm0 \n\t" "movq %0, %%mm0 \n\t"
"movq %1, %%mm1 \n\t" "movq %1, %%mm1 \n\t"
PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
...@@ -369,7 +369,7 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -369,7 +369,7 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
do { do {
asm volatile( __asm__ volatile(
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t" "movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t" "movq %0, %%mm3 \n\t"
...@@ -389,7 +389,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -389,7 +389,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
do { do {
asm volatile( __asm__ volatile(
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t" "movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t" "movq %0, %%mm3 \n\t"
...@@ -410,7 +410,7 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -410,7 +410,7 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
do { do {
asm volatile( __asm__ volatile(
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t" "movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t" "movq %0, %%mm3 \n\t"
...@@ -436,7 +436,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -436,7 +436,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
JUMPALIGN(); JUMPALIGN();
do { do {
asm volatile( __asm__ volatile(
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
"movq %2, %%mm1 \n\t" "movq %2, %%mm1 \n\t"
"movq %0, %%mm3 \n\t" "movq %0, %%mm3 \n\t"
...@@ -461,7 +461,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -461,7 +461,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
asm volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
ASMALIGN(3) ASMALIGN(3)
...@@ -502,7 +502,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -502,7 +502,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
{ {
MOVQ_ZERO(mm7); MOVQ_ZERO(mm7);
SET_RND(mm6); // =2 for rnd and =1 for no_rnd version SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
asm volatile( __asm__ volatile(
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm4 \n\t" "movq 1(%1), %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
{ {
asm volatile( __asm__ volatile(
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
ASMALIGN(4) ASMALIGN(4)
...@@ -58,7 +58,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) ...@@ -58,7 +58,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size) static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size)
{ {
asm volatile( __asm__ volatile(
"pxor %%xmm7, %%xmm7 \n\t" "pxor %%xmm7, %%xmm7 \n\t"
"movq (%0), %%xmm0 \n\t" "movq (%0), %%xmm0 \n\t"
"movq (%0, %2), %%xmm1 \n\t" "movq (%0, %2), %%xmm1 \n\t"
...@@ -92,7 +92,7 @@ static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size ...@@ -92,7 +92,7 @@ static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size
static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)
{ {
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
ASMALIGN(4) ASMALIGN(4)
...@@ -124,7 +124,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ ...@@ -124,7 +124,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){
int sum; int sum;
x86_reg index= -line_size*h; x86_reg index= -line_size*h;
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
"1: \n\t" "1: \n\t"
...@@ -159,7 +159,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ ...@@ -159,7 +159,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){
static int pix_norm1_mmx(uint8_t *pix, int line_size) { static int pix_norm1_mmx(uint8_t *pix, int line_size) {
int tmp; int tmp;
asm volatile ( __asm__ volatile (
"movl $16,%%ecx\n" "movl $16,%%ecx\n"
"pxor %%mm0,%%mm0\n" "pxor %%mm0,%%mm0\n"
"pxor %%mm7,%%mm7\n" "pxor %%mm7,%%mm7\n"
...@@ -202,7 +202,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { ...@@ -202,7 +202,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) {
static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp; int tmp;
asm volatile ( __asm__ volatile (
"movl %4,%%ecx\n" "movl %4,%%ecx\n"
"shr $1,%%ecx\n" "shr $1,%%ecx\n"
"pxor %%mm0,%%mm0\n" /* mm0 = 0 */ "pxor %%mm0,%%mm0\n" /* mm0 = 0 */
...@@ -263,7 +263,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int ...@@ -263,7 +263,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp; int tmp;
asm volatile ( __asm__ volatile (
"movl %4,%%ecx\n" "movl %4,%%ecx\n"
"pxor %%mm0,%%mm0\n" /* mm0 = 0 */ "pxor %%mm0,%%mm0\n" /* mm0 = 0 */
"pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */
...@@ -323,7 +323,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int ...@@ -323,7 +323,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp; int tmp;
asm volatile ( __asm__ volatile (
"shr $1,%2\n" "shr $1,%2\n"
"pxor %%xmm0,%%xmm0\n" /* mm0 = 0 */ "pxor %%xmm0,%%xmm0\n" /* mm0 = 0 */
"pxor %%xmm7,%%xmm7\n" /* mm7 holds the sum */ "pxor %%xmm7,%%xmm7\n" /* mm7 holds the sum */
...@@ -385,7 +385,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in ...@@ -385,7 +385,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
int tmp; int tmp;
asm volatile ( __asm__ volatile (
"movl %3,%%ecx\n" "movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n" "pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n" "pxor %%mm6,%%mm6\n"
...@@ -511,7 +511,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { ...@@ -511,7 +511,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) { static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
int tmp; int tmp;
uint8_t * pix= pix1; uint8_t * pix= pix1;
asm volatile ( __asm__ volatile (
"movl %3,%%ecx\n" "movl %3,%%ecx\n"
"pxor %%mm7,%%mm7\n" "pxor %%mm7,%%mm7\n"
"pxor %%mm6,%%mm6\n" "pxor %%mm6,%%mm6\n"
...@@ -673,7 +673,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si ...@@ -673,7 +673,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
"paddw " #in0 ", %%mm6\n" "paddw " #in0 ", %%mm6\n"
asm volatile ( __asm__ volatile (
"movl %3,%%ecx\n" "movl %3,%%ecx\n"
"pxor %%mm6,%%mm6\n" "pxor %%mm6,%%mm6\n"
"pxor %%mm7,%%mm7\n" "pxor %%mm7,%%mm7\n"
...@@ -719,7 +719,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s ...@@ -719,7 +719,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s
"paddw " #in1 ", " #in0 "\n"\ "paddw " #in1 ", " #in0 "\n"\
"paddw " #in0 ", %%mm6\n" "paddw " #in0 ", %%mm6\n"
asm volatile ( __asm__ volatile (
"movl %3,%%ecx\n" "movl %3,%%ecx\n"
"pxor %%mm6,%%mm6\n" "pxor %%mm6,%%mm6\n"
"pxor %%mm7,%%mm7\n" "pxor %%mm7,%%mm7\n"
...@@ -782,7 +782,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in ...@@ -782,7 +782,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
"paddw " #in0 ", %%mm6\n" "paddw " #in0 ", %%mm6\n"
asm volatile ( __asm__ volatile (
"movl %4,%%ecx\n" "movl %4,%%ecx\n"
"pxor %%mm6,%%mm6\n" "pxor %%mm6,%%mm6\n"
"pcmpeqw %%mm7,%%mm7\n" "pcmpeqw %%mm7,%%mm7\n"
...@@ -845,7 +845,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i ...@@ -845,7 +845,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i
"paddw " #in1 ", " #in0 "\n"\ "paddw " #in1 ", " #in0 "\n"\
"paddw " #in0 ", %%mm6\n" "paddw " #in0 ", %%mm6\n"
asm volatile ( __asm__ volatile (
"movl %4,%%ecx\n" "movl %4,%%ecx\n"
"pxor %%mm6,%%mm6\n" "pxor %%mm6,%%mm6\n"
"pcmpeqw %%mm7,%%mm7\n" "pcmpeqw %%mm7,%%mm7\n"
...@@ -881,7 +881,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i ...@@ -881,7 +881,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
x86_reg i=0; x86_reg i=0;
asm volatile( __asm__ volatile(
"1: \n\t" "1: \n\t"
"movq (%2, %0), %%mm0 \n\t" "movq (%2, %0), %%mm0 \n\t"
"movq (%1, %0), %%mm1 \n\t" "movq (%1, %0), %%mm1 \n\t"
...@@ -905,7 +905,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -905,7 +905,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
x86_reg i=0; x86_reg i=0;
uint8_t l, lt; uint8_t l, lt;
asm volatile( __asm__ volatile(
"1: \n\t" "1: \n\t"
"movq -1(%1, %0), %%mm0 \n\t" // LT "movq -1(%1, %0), %%mm0 \n\t" // LT
"movq (%1, %0), %%mm1 \n\t" // T "movq (%1, %0), %%mm1 \n\t" // T
...@@ -946,7 +946,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -946,7 +946,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
#define DIFF_PIXELS_8(m0,m1,mm,p1,p2,stride,temp) {\ #define DIFF_PIXELS_8(m0,m1,mm,p1,p2,stride,temp) {\
uint8_t *p1b=p1, *p2b=p2;\ uint8_t *p1b=p1, *p2b=p2;\
asm volatile(\ __asm__ volatile(\
DIFF_PIXELS_1(m0, mm##0, mm##7, (%1), (%2))\ DIFF_PIXELS_1(m0, mm##0, mm##7, (%1), (%2))\
DIFF_PIXELS_1(m0, mm##1, mm##7, (%1,%3), (%2,%3))\ DIFF_PIXELS_1(m0, mm##1, mm##7, (%1,%3), (%2,%3))\
DIFF_PIXELS_1(m0, mm##2, mm##7, (%1,%3,2), (%2,%3,2))\ DIFF_PIXELS_1(m0, mm##2, mm##7, (%1,%3,2), (%2,%3,2))\
...@@ -1069,7 +1069,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid ...@@ -1069,7 +1069,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
\ \
DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\ DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\
\ \
asm volatile(\ __asm__ volatile(\
HADAMARD48\ HADAMARD48\
\ \
"movq %%mm7, 96(%1) \n\t"\ "movq %%mm7, 96(%1) \n\t"\
...@@ -1087,7 +1087,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid ...@@ -1087,7 +1087,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
\ \
DIFF_PIXELS_4x8(src1+4, src2+4, stride, temp[4]);\ DIFF_PIXELS_4x8(src1+4, src2+4, stride, temp[4]);\
\ \
asm volatile(\ __asm__ volatile(\
HADAMARD48\ HADAMARD48\
\ \
"movq %%mm7, 96(%1) \n\t"\ "movq %%mm7, 96(%1) \n\t"\
...@@ -1152,7 +1152,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid ...@@ -1152,7 +1152,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
\ \
DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\ DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\
\ \
asm volatile(\ __asm__ volatile(\
HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)\ HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)\
TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%1))\ TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%1))\
HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1)\ HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1)\
...@@ -1219,7 +1219,7 @@ HADAMARD8_DIFF_SSE2(ssse3) ...@@ -1219,7 +1219,7 @@ HADAMARD8_DIFF_SSE2(ssse3)
#define DCT_SAD_FUNC(cpu) \ #define DCT_SAD_FUNC(cpu) \
static int sum_abs_dctelem_##cpu(DCTELEM *block){\ static int sum_abs_dctelem_##cpu(DCTELEM *block){\
int sum;\ int sum;\
asm volatile(\ __asm__ volatile(\
DCT_SAD\ DCT_SAD\
:"=r"(sum)\ :"=r"(sum)\
:"r"(block)\ :"r"(block)\
...@@ -1256,7 +1256,7 @@ DCT_SAD_FUNC(ssse3) ...@@ -1256,7 +1256,7 @@ DCT_SAD_FUNC(ssse3)
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){
int sum; int sum;
x86_reg i=size; x86_reg i=size;
asm volatile( __asm__ volatile(
"pxor %%mm4, %%mm4 \n" "pxor %%mm4, %%mm4 \n"
"1: \n" "1: \n"
"sub $8, %0 \n" "sub $8, %0 \n"
......
...@@ -371,7 +371,7 @@ FDCT_COL(sse2, xmm, movdqa) ...@@ -371,7 +371,7 @@ FDCT_COL(sse2, xmm, movdqa)
static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{ {
asm volatile( __asm__ volatile(
#define FDCT_ROW_SSE2_H1(i,t) \ #define FDCT_ROW_SSE2_H1(i,t) \
"movq " #i "(%0), %%xmm2 \n\t" \ "movq " #i "(%0), %%xmm2 \n\t" \
"movq " #i "+8(%0), %%xmm0 \n\t" \ "movq " #i "+8(%0), %%xmm0 \n\t" \
......
...@@ -46,7 +46,7 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) ...@@ -46,7 +46,7 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z)
int n = 1<<s->nbits; int n = 1<<s->nbits;
int i; int i;
ff_fft_dispatch_interleave_3dn2(z, s->nbits); ff_fft_dispatch_interleave_3dn2(z, s->nbits);
asm volatile("femms"); __asm__ volatile("femms");
if(n <= 8) if(n <= 8)
for(i=0; i<n; i+=2) for(i=0; i<n; i+=2)
FFSWAP(FFTSample, z[i].im, z[i+1].re); FFSWAP(FFTSample, z[i].im, z[i+1].re);
...@@ -69,11 +69,11 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu ...@@ -69,11 +69,11 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu
in1 = input; in1 = input;
in2 = input + n2 - 1; in2 = input + n2 - 1;
#ifdef EMULATE_3DNOWEXT #ifdef EMULATE_3DNOWEXT
asm volatile("movd %0, %%mm7" ::"r"(1<<31)); __asm__ volatile("movd %0, %%mm7" ::"r"(1<<31));
#endif #endif
for(k = 0; k < n4; k++) { for(k = 0; k < n4; k++) {
// FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it
asm volatile( __asm__ volatile(
"movd %0, %%mm0 \n" "movd %0, %%mm0 \n"
"movd %2, %%mm1 \n" "movd %2, %%mm1 \n"
"punpckldq %1, %%mm0 \n" "punpckldq %1, %%mm0 \n"
...@@ -94,7 +94,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu ...@@ -94,7 +94,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu
::"m"(in2[-2*k]), "m"(in1[2*k]), ::"m"(in2[-2*k]), "m"(in1[2*k]),
"m"(tcos[k]), "m"(tsin[k]) "m"(tcos[k]), "m"(tsin[k])
); );
asm volatile( __asm__ volatile(
"movq %%mm0, %0 \n\t" "movq %%mm0, %0 \n\t"
:"=m"(z[revtab[k]]) :"=m"(z[revtab[k]])
); );
...@@ -117,7 +117,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu ...@@ -117,7 +117,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu
/* post rotation */ /* post rotation */
j = -n2; j = -n2;
k = n2-8; k = n2-8;
asm volatile( __asm__ volatile(
"1: \n" "1: \n"
CMUL(%0, %%mm0, %%mm1) CMUL(%0, %%mm0, %%mm1)
CMUL(%1, %%mm2, %%mm3) CMUL(%1, %%mm2, %%mm3)
...@@ -140,7 +140,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu ...@@ -140,7 +140,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu
:"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8)
:"memory" :"memory"
); );
asm volatile("femms"); __asm__ volatile("femms");
} }
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input) void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input)
...@@ -153,7 +153,7 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu ...@@ -153,7 +153,7 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu
j = -n; j = -n;
k = n-8; k = n-8;
asm volatile( __asm__ volatile(
"movq %4, %%mm7 \n" "movq %4, %%mm7 \n"
"1: \n" "1: \n"
PSWAPD((%2,%1), %%mm0) PSWAPD((%2,%1), %%mm0)
...@@ -168,6 +168,6 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu ...@@ -168,6 +168,6 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu
:"r"(output+n4), "r"(output+n4*3), :"r"(output+n4), "r"(output+n4*3),
"m"(*m1m1) "m"(*m1m1)
); );
asm volatile("femms"); __asm__ volatile("femms");
} }
...@@ -36,7 +36,7 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) ...@@ -36,7 +36,7 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
if(n <= 16) { if(n <= 16) {
x86_reg i = -8*n; x86_reg i = -8*n;
asm volatile( __asm__ volatile(
"1: \n" "1: \n"
"movaps (%0,%1), %%xmm0 \n" "movaps (%0,%1), %%xmm0 \n"
"movaps %%xmm0, %%xmm1 \n" "movaps %%xmm0, %%xmm1 \n"
...@@ -58,7 +58,7 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z) ...@@ -58,7 +58,7 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z)
int n = 1 << s->nbits; int n = 1 << s->nbits;
int i; int i;
for(i=0; i<n; i+=2) { for(i=0; i<n; i+=2) {
asm volatile( __asm__ volatile(
"movaps %2, %%xmm0 \n" "movaps %2, %%xmm0 \n"
"movlps %%xmm0, %0 \n" "movlps %%xmm0, %0 \n"
"movhps %%xmm0, %1 \n" "movhps %%xmm0, %1 \n"
...@@ -84,7 +84,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input ...@@ -84,7 +84,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input
/* pre rotation */ /* pre rotation */
for(k=n8-2; k>=0; k-=2) { for(k=n8-2; k>=0; k-=2) {
asm volatile( __asm__ volatile(
"movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im } "movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im }
"movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } "movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im }
"movaps %%xmm0, %%xmm2 \n" "movaps %%xmm0, %%xmm2 \n"
...@@ -111,7 +111,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input ...@@ -111,7 +111,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input
#ifdef ARCH_X86_64 #ifdef ARCH_X86_64
// if we have enough regs, don't let gcc make the luts latency-bound // if we have enough regs, don't let gcc make the luts latency-bound
// but if not, latency is faster than spilling // but if not, latency is faster than spilling
asm("movlps %%xmm0, %0 \n" __asm__("movlps %%xmm0, %0 \n"
"movhps %%xmm0, %1 \n" "movhps %%xmm0, %1 \n"
"movlps %%xmm1, %2 \n" "movlps %%xmm1, %2 \n"
"movhps %%xmm1, %3 \n" "movhps %%xmm1, %3 \n"
...@@ -121,10 +121,10 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input ...@@ -121,10 +121,10 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input
"=m"(z[revtab[ k+1]]) "=m"(z[revtab[ k+1]])
); );
#else #else
asm("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]])); __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]]));
asm("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]])); __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]]));
asm("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]]));
asm("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]]));
#endif #endif
} }
...@@ -146,7 +146,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input ...@@ -146,7 +146,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input
j = -n2; j = -n2;
k = n2-16; k = n2-16;
asm volatile( __asm__ volatile(
"1: \n" "1: \n"
CMUL(%0, %%xmm0, %%xmm1) CMUL(%0, %%xmm0, %%xmm1)
CMUL(%1, %%xmm4, %%xmm5) CMUL(%1, %%xmm4, %%xmm5)
...@@ -181,7 +181,7 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input ...@@ -181,7 +181,7 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input
j = -n; j = -n;
k = n-16; k = n-16;
asm volatile( __asm__ volatile(
"movaps %4, %%xmm7 \n" "movaps %4, %%xmm7 \n"
"1: \n" "1: \n"
"movaps (%2,%1), %%xmm0 \n" "movaps (%2,%1), %%xmm0 \n"
......
...@@ -28,7 +28,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data ...@@ -28,7 +28,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data
int n2 = len>>1; int n2 = len>>1;
x86_reg i = -n2*sizeof(int32_t); x86_reg i = -n2*sizeof(int32_t);
x86_reg j = n2*sizeof(int32_t); x86_reg j = n2*sizeof(int32_t);
asm volatile( __asm__ volatile(
"movsd %0, %%xmm7 \n\t" "movsd %0, %%xmm7 \n\t"
"movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t" "movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t"
"movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t" "movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t"
...@@ -38,7 +38,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data ...@@ -38,7 +38,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data
::"m"(c) ::"m"(c)
); );
#define WELCH(MOVPD, offset)\ #define WELCH(MOVPD, offset)\
asm volatile(\ __asm__ volatile(\
"1: \n\t"\ "1: \n\t"\
"movapd %%xmm7, %%xmm1 \n\t"\ "movapd %%xmm7, %%xmm1 \n\t"\
"mulpd %%xmm1, %%xmm1 \n\t"\ "mulpd %%xmm1, %%xmm1 \n\t"\
...@@ -84,7 +84,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, ...@@ -84,7 +84,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
for(j=0; j<lag; j+=2){ for(j=0; j<lag; j+=2){
x86_reg i = -len*sizeof(double); x86_reg i = -len*sizeof(double);
if(j == lag-2) { if(j == lag-2) {
asm volatile( __asm__ volatile(
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
"movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"
...@@ -113,7 +113,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, ...@@ -113,7 +113,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
:"r"(data1+len), "r"(data1+len-j) :"r"(data1+len), "r"(data1+len-j)
); );
} else { } else {
asm volatile( __asm__ volatile(
"movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
"movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
"1: \n\t" "1: \n\t"
......
...@@ -43,7 +43,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -43,7 +43,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
int minusstart= -(int)significant_coeff_ctx_base; int minusstart= -(int)significant_coeff_ctx_base;
int minusindex= 4-(int)index; int minusindex= 4-(int)index;
int coeff_count; int coeff_count;
asm volatile( __asm__ volatile(
"movl "RANGE "(%3), %%esi \n\t" "movl "RANGE "(%3), %%esi \n\t"
"movl "LOW "(%3), %%ebx \n\t" "movl "LOW "(%3), %%ebx \n\t"
...@@ -96,7 +96,7 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -96,7 +96,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
int minusindex= 4-(int)index; int minusindex= 4-(int)index;
int coeff_count; int coeff_count;
x86_reg last=0; x86_reg last=0;
asm volatile( __asm__ volatile(
"movl "RANGE "(%3), %%esi \n\t" "movl "RANGE "(%3), %%esi \n\t"
"movl "LOW "(%3), %%ebx \n\t" "movl "LOW "(%3), %%ebx \n\t"
......
This diff is collapsed.
...@@ -483,7 +483,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm[32*4]) = { ...@@ -483,7 +483,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm[32*4]) = {
void ff_idct_xvid_mmx(short *block){ void ff_idct_xvid_mmx(short *block){
asm volatile( __asm__ volatile(
//# Process each row //# Process each row
DCT_8_INV_ROW_MMX(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) DCT_8_INV_ROW_MMX(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
DCT_8_INV_ROW_MMX(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) DCT_8_INV_ROW_MMX(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1))
...@@ -506,7 +506,7 @@ asm volatile( ...@@ -506,7 +506,7 @@ asm volatile(
void ff_idct_xvid_mmx2(short *block){ void ff_idct_xvid_mmx2(short *block){
asm volatile( __asm__ volatile(
//# Process each row //# Process each row
DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
DCT_8_INV_ROW_XMM(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) DCT_8_INV_ROW_XMM(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1))
......
...@@ -341,7 +341,7 @@ DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = { ...@@ -341,7 +341,7 @@ DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = {
inline void ff_idct_xvid_sse2(short *block) inline void ff_idct_xvid_sse2(short *block)
{ {
asm volatile( __asm__ volatile(
"movq "MANGLE(m127)", %%mm0 \n\t" "movq "MANGLE(m127)", %%mm0 \n\t"
iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0)) iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0))
iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1)) iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#ifdef FRAC_BITS #ifdef FRAC_BITS
# define MULL(ra, rb) \ # define MULL(ra, rb) \
({ int rt, dummy; asm (\ ({ int rt, dummy; __asm__ (\
"imull %3 \n\t"\ "imull %3 \n\t"\
"shrdl %4, %%edx, %%eax \n\t"\ "shrdl %4, %%edx, %%eax \n\t"\
: "=a"(rt), "=d"(dummy)\ : "=a"(rt), "=d"(dummy)\
...@@ -34,12 +34,12 @@ ...@@ -34,12 +34,12 @@
#define MULH(ra, rb) \ #define MULH(ra, rb) \
({ int rt, dummy;\ ({ int rt, dummy;\
asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\ __asm__ ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\
rt; }) rt; })
#define MUL64(ra, rb) \ #define MUL64(ra, rb) \
({ int64_t rt;\ ({ int64_t rt;\
asm ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\ __asm__ ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\
rt; }) rt; })
#endif /* AVCODEC_I386_MATHOPS_H */ #endif /* AVCODEC_I386_MATHOPS_H */
...@@ -43,25 +43,25 @@ typedef union { ...@@ -43,25 +43,25 @@ typedef union {
#define mmx_i2r(op,imm,reg) \ #define mmx_i2r(op,imm,reg) \
asm volatile (#op " %0, %%" #reg \ __asm__ volatile (#op " %0, %%" #reg \
: /* nothing */ \ : /* nothing */ \
: "i" (imm) ) : "i" (imm) )
#define mmx_m2r(op,mem,reg) \ #define mmx_m2r(op,mem,reg) \
asm volatile (#op " %0, %%" #reg \ __asm__ volatile (#op " %0, %%" #reg \
: /* nothing */ \ : /* nothing */ \
: "m" (mem)) : "m" (mem))
#define mmx_r2m(op,reg,mem) \ #define mmx_r2m(op,reg,mem) \
asm volatile (#op " %%" #reg ", %0" \ __asm__ volatile (#op " %%" #reg ", %0" \
: "=m" (mem) \ : "=m" (mem) \
: /* nothing */ ) : /* nothing */ )
#define mmx_r2r(op,regs,regd) \ #define mmx_r2r(op,regs,regd) \
asm volatile (#op " %" #regs ", %" #regd) __asm__ volatile (#op " %" #regs ", %" #regd)
#define emms() asm volatile ("emms") #define emms() __asm__ volatile ("emms")
#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) #define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) #define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
...@@ -200,16 +200,16 @@ typedef union { ...@@ -200,16 +200,16 @@ typedef union {
#define mmx_m2ri(op,mem,reg,imm) \ #define mmx_m2ri(op,mem,reg,imm) \
asm volatile (#op " %1, %0, %%" #reg \ __asm__ volatile (#op " %1, %0, %%" #reg \
: /* nothing */ \ : /* nothing */ \
: "m" (mem), "i" (imm)) : "m" (mem), "i" (imm))
#define mmx_r2ri(op,regs,regd,imm) \ #define mmx_r2ri(op,regs,regd,imm) \
asm volatile (#op " %0, %%" #regs ", %%" #regd \ __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \
: /* nothing */ \ : /* nothing */ \
: "i" (imm) ) : "i" (imm) )
#define mmx_fetch(mem,hint) \ #define mmx_fetch(mem,hint) \
asm volatile ("prefetch" #hint " %0" \ __asm__ volatile ("prefetch" #hint " %0" \
: /* nothing */ \ : /* nothing */ \
: "m" (mem)) : "m" (mem))
...@@ -240,7 +240,7 @@ typedef union { ...@@ -240,7 +240,7 @@ typedef union {
#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) #define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
#define pmovmskb(mmreg,reg) \ #define pmovmskb(mmreg,reg) \
asm volatile ("movmskps %" #mmreg ", %" #reg) __asm__ volatile ("movmskps %" #mmreg ", %" #reg)
#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) #define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) #define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
...@@ -256,7 +256,7 @@ typedef union { ...@@ -256,7 +256,7 @@ typedef union {
#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) #define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) #define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
#define sfence() asm volatile ("sfence\n\t") #define sfence() __asm__ volatile ("sfence\n\t")
/* SSE2 */ /* SSE2 */
#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) #define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
......
...@@ -36,7 +36,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; ...@@ -36,7 +36,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL;
static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
x86_reg len= -(stride*h); x86_reg len= -(stride*h);
asm volatile( __asm__ volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
...@@ -71,7 +71,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -71,7 +71,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
asm volatile( __asm__ volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -92,7 +92,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -92,7 +92,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
{ {
int ret; int ret;
asm volatile( __asm__ volatile(
"pxor %%xmm6, %%xmm6 \n\t" "pxor %%xmm6, %%xmm6 \n\t"
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
...@@ -109,7 +109,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ...@@ -109,7 +109,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
: "+r" (h), "+r" (blk1), "+r" (blk2) : "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((x86_reg)stride) : "r" ((x86_reg)stride)
); );
asm volatile( __asm__ volatile(
"movhlps %%xmm6, %%xmm0 \n\t" "movhlps %%xmm6, %%xmm0 \n\t"
"paddw %%xmm0, %%xmm6 \n\t" "paddw %%xmm0, %%xmm6 \n\t"
"movd %%xmm6, %0 \n\t" "movd %%xmm6, %0 \n\t"
...@@ -120,7 +120,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ...@@ -120,7 +120,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
asm volatile( __asm__ volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -142,7 +142,7 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ...@@ -142,7 +142,7 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
asm volatile( __asm__ volatile(
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
ASMALIGN(4) ASMALIGN(4)
...@@ -167,7 +167,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ...@@ -167,7 +167,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
asm volatile( __asm__ volatile(
"movq "MANGLE(bone)", %%mm5 \n\t" "movq "MANGLE(bone)", %%mm5 \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"pavgb 1(%1), %%mm0 \n\t" "pavgb 1(%1), %%mm0 \n\t"
...@@ -198,7 +198,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -198,7 +198,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{ {
x86_reg len= -(stride*h); x86_reg len= -(stride*h);
asm volatile( __asm__ volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
...@@ -236,7 +236,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int ...@@ -236,7 +236,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
x86_reg len= -(stride*h); x86_reg len= -(stride*h);
asm volatile( __asm__ volatile(
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
...@@ -289,7 +289,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -289,7 +289,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline int sum_mmx(void) static inline int sum_mmx(void)
{ {
int ret; int ret;
asm volatile( __asm__ volatile(
"movq %%mm6, %%mm0 \n\t" "movq %%mm6, %%mm0 \n\t"
"psrlq $32, %%mm6 \n\t" "psrlq $32, %%mm6 \n\t"
"paddw %%mm0, %%mm6 \n\t" "paddw %%mm0, %%mm6 \n\t"
...@@ -305,7 +305,7 @@ static inline int sum_mmx(void) ...@@ -305,7 +305,7 @@ static inline int sum_mmx(void)
static inline int sum_mmx2(void) static inline int sum_mmx2(void)
{ {
int ret; int ret;
asm volatile( __asm__ volatile(
"movd %%mm6, %0 \n\t" "movd %%mm6, %0 \n\t"
: "=r" (ret) : "=r" (ret)
); );
...@@ -326,7 +326,7 @@ static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -326,7 +326,7 @@ static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
assert(h==8);\ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\ "pxor %%mm6, %%mm6 \n\t":);\
\ \
sad8_1_ ## suf(blk1, blk2, stride, 8);\ sad8_1_ ## suf(blk1, blk2, stride, 8);\
...@@ -336,7 +336,7 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h ...@@ -336,7 +336,7 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
assert(h==8);\ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\ "movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \ :: "m"(round_tab[1]) \
...@@ -350,7 +350,7 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in ...@@ -350,7 +350,7 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
assert(h==8);\ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\ "movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \ :: "m"(round_tab[1]) \
...@@ -364,7 +364,7 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in ...@@ -364,7 +364,7 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
assert(h==8);\ assert(h==8);\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
::);\ ::);\
\ \
...@@ -375,7 +375,7 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i ...@@ -375,7 +375,7 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
\ \
static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\ "pxor %%mm6, %%mm6 \n\t":);\
\ \
sad8_1_ ## suf(blk1 , blk2 , stride, h);\ sad8_1_ ## suf(blk1 , blk2 , stride, h);\
...@@ -385,7 +385,7 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int ...@@ -385,7 +385,7 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int
}\ }\
static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\ "movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \ :: "m"(round_tab[1]) \
...@@ -398,7 +398,7 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i ...@@ -398,7 +398,7 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
}\ }\
static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\ "movq %0, %%mm5 \n\t"\
:: "m"(round_tab[1]) \ :: "m"(round_tab[1]) \
...@@ -411,7 +411,7 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i ...@@ -411,7 +411,7 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
}\ }\
static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\ {\
asm volatile("pxor %%mm7, %%mm7 \n\t"\ __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
::);\ ::);\
\ \
......
...@@ -55,7 +55,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, ...@@ -55,7 +55,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
else else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd); //printf("%d %d ", qmul, qadd);
asm volatile( __asm__ volatile(
"movd %1, %%mm6 \n\t" //qmul "movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
...@@ -118,7 +118,7 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, ...@@ -118,7 +118,7 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
//printf("%d %d ", qmul, qadd); //printf("%d %d ", qmul, qadd);
asm volatile( __asm__ volatile(
"movd %1, %%mm6 \n\t" //qmul "movd %1, %%mm6 \n\t" //qmul
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
...@@ -214,7 +214,7 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, ...@@ -214,7 +214,7 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
block0 = block[0] * s->c_dc_scale; block0 = block[0] * s->c_dc_scale;
/* XXX: only mpeg1 */ /* XXX: only mpeg1 */
quant_matrix = s->intra_matrix; quant_matrix = s->intra_matrix;
asm volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
...@@ -277,7 +277,7 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, ...@@ -277,7 +277,7 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
quant_matrix = s->inter_matrix; quant_matrix = s->inter_matrix;
asm volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
...@@ -349,7 +349,7 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, ...@@ -349,7 +349,7 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
else else
block0 = block[0] * s->c_dc_scale; block0 = block[0] * s->c_dc_scale;
quant_matrix = s->intra_matrix; quant_matrix = s->intra_matrix;
asm volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $15, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t"
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
...@@ -410,7 +410,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, ...@@ -410,7 +410,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
quant_matrix = s->inter_matrix; quant_matrix = s->inter_matrix;
asm volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlq $48, %%mm7 \n\t" "psrlq $48, %%mm7 \n\t"
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
...@@ -482,7 +482,7 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ ...@@ -482,7 +482,7 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
s->dct_count[intra]++; s->dct_count[intra]++;
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"1: \n\t" "1: \n\t"
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
...@@ -536,7 +536,7 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ ...@@ -536,7 +536,7 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
s->dct_count[intra]++; s->dct_count[intra]++;
asm volatile( __asm__ volatile(
"pxor %%xmm7, %%xmm7 \n\t" "pxor %%xmm7, %%xmm7 \n\t"
"1: \n\t" "1: \n\t"
"pxor %%xmm0, %%xmm0 \n\t" "pxor %%xmm0, %%xmm0 \n\t"
......
...@@ -117,13 +117,13 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -117,13 +117,13 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
/* note: block[0] is assumed to be positive */ /* note: block[0] is assumed to be positive */
if (!s->h263_aic) { if (!s->h263_aic) {
#if 1 #if 1
asm volatile ( __asm__ volatile (
"mul %%ecx \n\t" "mul %%ecx \n\t"
: "=d" (level), "=a"(dummy) : "=d" (level), "=a"(dummy)
: "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1]) : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1])
); );
#else #else
asm volatile ( __asm__ volatile (
"xorl %%edx, %%edx \n\t" "xorl %%edx, %%edx \n\t"
"divw %%cx \n\t" "divw %%cx \n\t"
"movzwl %%ax, %%eax \n\t" "movzwl %%ax, %%eax \n\t"
...@@ -149,7 +149,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -149,7 +149,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
asm volatile( __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"7, "MM"7 \n\t" // 0
...@@ -182,7 +182,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -182,7 +182,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"r" (inv_zigzag_direct16+64), "r" (temp_block+64) "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
); );
}else{ // FMT_H263 }else{ // FMT_H263
asm volatile( __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"7, "MM"7 \n\t" // 0
...@@ -214,7 +214,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -214,7 +214,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"r" (inv_zigzag_direct16+64), "r" (temp_block+64) "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
); );
} }
asm volatile( __asm__ volatile(
"movd %1, "MM"1 \n\t" // max_qcoeff "movd %1, "MM"1 \n\t" // max_qcoeff
SPREADW(MM"1") SPREADW(MM"1")
"psubusw "MM"1, "MM"4 \n\t" "psubusw "MM"1, "MM"4 \n\t"
......
...@@ -212,7 +212,7 @@ static inline void idct(int16_t *block) ...@@ -212,7 +212,7 @@ static inline void idct(int16_t *block)
DECLARE_ALIGNED(8, int64_t, align_tmp[16]); DECLARE_ALIGNED(8, int64_t, align_tmp[16]);
int16_t * const temp= (int16_t*)align_tmp; int16_t * const temp= (int16_t*)align_tmp;
asm volatile( __asm__ volatile(
#if 0 //Alternative, simpler variant #if 0 //Alternative, simpler variant
#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
......
...@@ -38,7 +38,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -38,7 +38,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
// calculate b[0] correctly afterwards. // calculate b[0] correctly afterwards.
i = 0; i = 0;
asm volatile( __asm__ volatile(
"pcmpeqd %%xmm7, %%xmm7 \n\t" "pcmpeqd %%xmm7, %%xmm7 \n\t"
"pcmpeqd %%xmm3, %%xmm3 \n\t" "pcmpeqd %%xmm3, %%xmm3 \n\t"
"psllw $1, %%xmm3 \n\t" "psllw $1, %%xmm3 \n\t"
...@@ -46,7 +46,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -46,7 +46,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
"psllw $13, %%xmm3 \n\t" "psllw $13, %%xmm3 \n\t"
::); ::);
for(; i<w_l-15; i+=16){ for(; i<w_l-15; i+=16){
asm volatile( __asm__ volatile(
"movdqu (%1), %%xmm1 \n\t" "movdqu (%1), %%xmm1 \n\t"
"movdqu 16(%1), %%xmm5 \n\t" "movdqu 16(%1), %%xmm5 \n\t"
"movdqu 2(%1), %%xmm2 \n\t" "movdqu 2(%1), %%xmm2 \n\t"
...@@ -77,7 +77,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -77,7 +77,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
dst[i] = dst[i] - (b[i] + b[i + 1]); dst[i] = dst[i] - (b[i] + b[i + 1]);
} }
for(; i<w_r-15; i+=16){ for(; i<w_r-15; i+=16){
asm volatile( __asm__ volatile(
"movdqu (%1), %%xmm1 \n\t" "movdqu (%1), %%xmm1 \n\t"
"movdqu 16(%1), %%xmm5 \n\t" "movdqu 16(%1), %%xmm5 \n\t"
"movdqu 2(%1), %%xmm2 \n\t" "movdqu 2(%1), %%xmm2 \n\t"
...@@ -102,14 +102,14 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -102,14 +102,14 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
IDWTELEM b_0 = b[0]; IDWTELEM b_0 = b[0];
i = 0; i = 0;
asm volatile( __asm__ volatile(
"psllw $15, %%xmm7 \n\t" "psllw $15, %%xmm7 \n\t"
"pcmpeqw %%xmm6, %%xmm6 \n\t" "pcmpeqw %%xmm6, %%xmm6 \n\t"
"psrlw $13, %%xmm6 \n\t" "psrlw $13, %%xmm6 \n\t"
"paddw %%xmm7, %%xmm6 \n\t" "paddw %%xmm7, %%xmm6 \n\t"
::); ::);
for(; i<w_l-15; i+=16){ for(; i<w_l-15; i+=16){
asm volatile( __asm__ volatile(
"movdqu (%1), %%xmm0 \n\t" "movdqu (%1), %%xmm0 \n\t"
"movdqu 16(%1), %%xmm4 \n\t" "movdqu 16(%1), %%xmm4 \n\t"
"movdqu 2(%1), %%xmm1 \n\t" "movdqu 2(%1), %%xmm1 \n\t"
...@@ -150,7 +150,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -150,7 +150,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS); temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS);
} }
for(; i<w_r-7; i+=8){ for(; i<w_r-7; i+=8){
asm volatile( __asm__ volatile(
"movdqu 2(%1), %%xmm2 \n\t" "movdqu 2(%1), %%xmm2 \n\t"
"movdqu 18(%1), %%xmm6 \n\t" "movdqu 18(%1), %%xmm6 \n\t"
"paddw (%1), %%xmm2 \n\t" "paddw (%1), %%xmm2 \n\t"
...@@ -180,7 +180,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -180,7 +180,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
b[i] = b[i>>1]; b[i] = b[i>>1];
} }
for (i-=62; i>=0; i-=64){ for (i-=62; i>=0; i-=64){
asm volatile( __asm__ volatile(
"movdqa (%1), %%xmm0 \n\t" "movdqa (%1), %%xmm0 \n\t"
"movdqa 16(%1), %%xmm2 \n\t" "movdqa 16(%1), %%xmm2 \n\t"
"movdqa 32(%1), %%xmm4 \n\t" "movdqa 32(%1), %%xmm4 \n\t"
...@@ -224,7 +224,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -224,7 +224,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
i = 1; i = 1;
b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS); b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
asm volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"pcmpeqw %%mm3, %%mm3 \n\t" "pcmpeqw %%mm3, %%mm3 \n\t"
"psllw $1, %%mm3 \n\t" "psllw $1, %%mm3 \n\t"
...@@ -232,7 +232,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -232,7 +232,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
"psllw $13, %%mm3 \n\t" "psllw $13, %%mm3 \n\t"
::); ::);
for(; i<w_l-7; i+=8){ for(; i<w_l-7; i+=8){
asm volatile( __asm__ volatile(
"movq (%1), %%mm2 \n\t" "movq (%1), %%mm2 \n\t"
"movq 8(%1), %%mm6 \n\t" "movq 8(%1), %%mm6 \n\t"
"paddw 2(%1), %%mm2 \n\t" "paddw 2(%1), %%mm2 \n\t"
...@@ -257,7 +257,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -257,7 +257,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
i = 0; i = 0;
for(; i<w_r-7; i+=8){ for(; i<w_r-7; i+=8){
asm volatile( __asm__ volatile(
"movq (%1), %%mm2 \n\t" "movq (%1), %%mm2 \n\t"
"movq 8(%1), %%mm6 \n\t" "movq 8(%1), %%mm6 \n\t"
"paddw 2(%1), %%mm2 \n\t" "paddw 2(%1), %%mm2 \n\t"
...@@ -280,14 +280,14 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -280,14 +280,14 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
i = 1; i = 1;
b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS); b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
asm volatile( __asm__ volatile(
"psllw $15, %%mm7 \n\t" "psllw $15, %%mm7 \n\t"
"pcmpeqw %%mm6, %%mm6 \n\t" "pcmpeqw %%mm6, %%mm6 \n\t"
"psrlw $13, %%mm6 \n\t" "psrlw $13, %%mm6 \n\t"
"paddw %%mm7, %%mm6 \n\t" "paddw %%mm7, %%mm6 \n\t"
::); ::);
for(; i<w_l-7; i+=8){ for(; i<w_l-7; i+=8){
asm volatile( __asm__ volatile(
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm4 \n\t" "movq 8(%1), %%mm4 \n\t"
"movq 2(%1), %%mm1 \n\t" "movq 2(%1), %%mm1 \n\t"
...@@ -324,7 +324,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -324,7 +324,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
i = 0; i = 0;
for(; i<w_r-7; i+=8){ for(; i<w_r-7; i+=8){
asm volatile( __asm__ volatile(
"movq 2(%1), %%mm2 \n\t" "movq 2(%1), %%mm2 \n\t"
"movq 10(%1), %%mm6 \n\t" "movq 10(%1), %%mm6 \n\t"
"paddw (%1), %%mm2 \n\t" "paddw (%1), %%mm2 \n\t"
...@@ -354,7 +354,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -354,7 +354,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
b[i] = b[i>>1]; b[i] = b[i>>1];
} }
for (i-=30; i>=0; i-=32){ for (i-=30; i>=0; i-=32){
asm volatile( __asm__ volatile(
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm2 \n\t" "movq 8(%1), %%mm2 \n\t"
"movq 16(%1), %%mm4 \n\t" "movq 16(%1), %%mm4 \n\t"
...@@ -448,7 +448,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ...@@ -448,7 +448,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
} }
i+=i; i+=i;
asm volatile ( __asm__ volatile (
"jmp 2f \n\t" "jmp 2f \n\t"
"1: \n\t" "1: \n\t"
snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6") snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
...@@ -544,7 +544,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I ...@@ -544,7 +544,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I
b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
} }
i+=i; i+=i;
asm volatile( __asm__ volatile(
"jmp 2f \n\t" "jmp 2f \n\t"
"1: \n\t" "1: \n\t"
...@@ -606,7 +606,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I ...@@ -606,7 +606,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I
#define snow_inner_add_yblock_sse2_header \ #define snow_inner_add_yblock_sse2_header \
IDWTELEM * * dst_array = sb->line + src_y;\ IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\ x86_reg tmp;\
asm volatile(\ __asm__ volatile(\
"mov %7, %%"REG_c" \n\t"\ "mov %7, %%"REG_c" \n\t"\
"mov %6, %2 \n\t"\ "mov %6, %2 \n\t"\
"mov %4, %%"REG_S" \n\t"\ "mov %4, %%"REG_S" \n\t"\
...@@ -759,7 +759,7 @@ snow_inner_add_yblock_sse2_end_16 ...@@ -759,7 +759,7 @@ snow_inner_add_yblock_sse2_end_16
#define snow_inner_add_yblock_mmx_header \ #define snow_inner_add_yblock_mmx_header \
IDWTELEM * * dst_array = sb->line + src_y;\ IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\ x86_reg tmp;\
asm volatile(\ __asm__ volatile(\
"mov %7, %%"REG_c" \n\t"\ "mov %7, %%"REG_c" \n\t"\
"mov %6, %2 \n\t"\ "mov %6, %2 \n\t"\
"mov %4, %%"REG_S" \n\t"\ "mov %4, %%"REG_S" \n\t"\
......
...@@ -74,7 +74,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, ...@@ -74,7 +74,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
const uint8_t *src, x86_reg stride, const uint8_t *src, x86_reg stride,
int rnd, int64_t shift) int rnd, int64_t shift)
{ {
asm volatile( __asm__ volatile(
"mov $3, %%"REG_c" \n\t" "mov $3, %%"REG_c" \n\t"
LOAD_ROUNDER_MMX("%5") LOAD_ROUNDER_MMX("%5")
"movq "MANGLE(ff_pw_9)", %%mm6 \n\t" "movq "MANGLE(ff_pw_9)", %%mm6 \n\t"
...@@ -114,7 +114,7 @@ static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride, ...@@ -114,7 +114,7 @@ static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,
src -= 1; src -= 1;
rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */ rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */
asm volatile( __asm__ volatile(
LOAD_ROUNDER_MMX("%4") LOAD_ROUNDER_MMX("%4")
"movq "MANGLE(ff_pw_128)", %%mm6\n\t" "movq "MANGLE(ff_pw_128)", %%mm6\n\t"
"movq "MANGLE(ff_pw_9)", %%mm5 \n\t" "movq "MANGLE(ff_pw_9)", %%mm5 \n\t"
...@@ -155,7 +155,7 @@ static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src, ...@@ -155,7 +155,7 @@ static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src,
x86_reg stride, int rnd, x86_reg offset) x86_reg stride, int rnd, x86_reg offset)
{ {
rnd = 8-rnd; rnd = 8-rnd;
asm volatile( __asm__ volatile(
"mov $8, %%"REG_c" \n\t" "mov $8, %%"REG_c" \n\t"
LOAD_ROUNDER_MMX("%5") LOAD_ROUNDER_MMX("%5")
"movq "MANGLE(ff_pw_9)", %%mm6\n\t" "movq "MANGLE(ff_pw_9)", %%mm6\n\t"
...@@ -264,7 +264,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ ...@@ -264,7 +264,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \
{ \ { \
int h = 8; \ int h = 8; \
src -= src_stride; \ src -= src_stride; \
asm volatile( \ __asm__ volatile( \
LOAD_ROUNDER_MMX("%5") \ LOAD_ROUNDER_MMX("%5") \
"movq "MANGLE(ff_pw_53)", %%mm5\n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5\n\t" \
"movq "MANGLE(ff_pw_18)", %%mm6\n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6\n\t" \
...@@ -320,7 +320,7 @@ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ ...@@ -320,7 +320,7 @@ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \
int h = 8; \ int h = 8; \
src -= 1; \ src -= 1; \
rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \ rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \
asm volatile( \ __asm__ volatile( \
LOAD_ROUNDER_MMX("%4") \ LOAD_ROUNDER_MMX("%4") \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
...@@ -358,7 +358,7 @@ vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ ...@@ -358,7 +358,7 @@ vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \
int h = 8; \ int h = 8; \
src -= offset; \ src -= offset; \
rnd = 32-rnd; \ rnd = 32-rnd; \
asm volatile ( \ __asm__ volatile ( \
LOAD_ROUNDER_MMX("%6") \ LOAD_ROUNDER_MMX("%6") \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
...@@ -412,7 +412,7 @@ static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, ...@@ -412,7 +412,7 @@ static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,
static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] = static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =
{ NULL, vc1_put_shift1_mmx, vc1_put_shift2_mmx, vc1_put_shift3_mmx }; { NULL, vc1_put_shift1_mmx, vc1_put_shift2_mmx, vc1_put_shift3_mmx };
asm volatile( __asm__ volatile(
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
::: "memory" ::: "memory"
); );
......
...@@ -250,7 +250,7 @@ void ff_vp3_idct_mmx(int16_t *output_data) ...@@ -250,7 +250,7 @@ void ff_vp3_idct_mmx(int16_t *output_data)
#define I(x) AV_STRINGIFY(16* x )"(%0)" #define I(x) AV_STRINGIFY(16* x )"(%0)"
#define J(x) AV_STRINGIFY(16*(x-4) + 8)"(%0)" #define J(x) AV_STRINGIFY(16*(x-4) + 8)"(%0)"
asm volatile ( __asm__ volatile (
RowIDCT() RowIDCT()
Transpose() Transpose()
......
...@@ -161,7 +161,7 @@ void ff_vp3_idct_sse2(int16_t *input_data) ...@@ -161,7 +161,7 @@ void ff_vp3_idct_sse2(int16_t *input_data)
#define O(x) I(x) #define O(x) I(x)
#define C(x) AV_STRINGIFY(16*(x-1))"(%1)" #define C(x) AV_STRINGIFY(16*(x-1))"(%1)"
asm volatile ( __asm__ volatile (
VP3_1D_IDCT_SSE2(NOP, NOP) VP3_1D_IDCT_SSE2(NOP, NOP)
TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%0)) TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%0))
......
...@@ -654,7 +654,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, ...@@ -654,7 +654,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
fact they decided to store the quantized DC (which would lead fact they decided to store the quantized DC (which would lead
to problems if Q could vary !) */ to problems if Q could vary !) */
#if (defined(ARCH_X86)) && !defined PIC #if (defined(ARCH_X86)) && !defined PIC
asm volatile( __asm__ volatile(
"movl %3, %%eax \n\t" "movl %3, %%eax \n\t"
"shrl $1, %%eax \n\t" "shrl $1, %%eax \n\t"
"addl %%eax, %2 \n\t" "addl %%eax, %2 \n\t"
......
...@@ -66,7 +66,7 @@ int has_altivec(void) ...@@ -66,7 +66,7 @@ int has_altivec(void)
#elif defined(RUNTIME_CPUDETECT) #elif defined(RUNTIME_CPUDETECT)
int proc_ver; int proc_ver;
// Support of mfspr PVR emulation added in Linux 2.6.17. // Support of mfspr PVR emulation added in Linux 2.6.17.
asm volatile("mfspr %0, 287" : "=r" (proc_ver)); __asm__ volatile("mfspr %0, 287" : "=r" (proc_ver));
proc_ver >>= 16; proc_ver >>= 16;
if (proc_ver & 0x8000 || if (proc_ver & 0x8000 ||
proc_ver == 0x000c || proc_ver == 0x000c ||
......
...@@ -148,7 +148,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1); ...@@ -148,7 +148,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
i += 16; i += 16;
} }
for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) { for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
} }
if (misal) { if (misal) {
((unsigned long*)blocks)[188] = 0L; ((unsigned long*)blocks)[188] = 0L;
...@@ -181,7 +181,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1); ...@@ -181,7 +181,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
} }
else else
for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) { for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory"); __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
} }
#else #else
memset(blocks, 0, sizeof(DCTELEM)*6*64); memset(blocks, 0, sizeof(DCTELEM)*6*64);
...@@ -219,7 +219,7 @@ long check_dcbzl_effect(void) ...@@ -219,7 +219,7 @@ long check_dcbzl_effect(void)
/* below the constraint "b" seems to mean "Address base register" /* below the constraint "b" seems to mean "Address base register"
in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */ in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024 ; i ++) { for (i = 0; i < 1024 ; i ++) {
if (fakedata[i] == (char)0) if (fakedata[i] == (char)0)
...@@ -241,7 +241,7 @@ static void prefetch_ppc(void *mem, int stride, int h) ...@@ -241,7 +241,7 @@ static void prefetch_ppc(void *mem, int stride, int h)
{ {
register const uint8_t *p = mem; register const uint8_t *p = mem;
do { do {
asm volatile ("dcbt 0,%0" : : "r" (p)); __asm__ volatile ("dcbt 0,%0" : : "r" (p));
p+= stride; p+= stride;
} while(--h); } while(--h);
} }
......
...@@ -70,36 +70,36 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][ ...@@ -70,36 +70,36 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
#ifndef HAVE_PPC64 #ifndef HAVE_PPC64
#define POWERP_PMC_DATATYPE unsigned long #define POWERP_PMC_DATATYPE unsigned long
#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a)) #define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 937" : "=r" (a))
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a)) #define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 938" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2) #if (POWERPC_NUM_PMC_ENABLED > 2)
#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a)) #define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 941" : "=r" (a))
#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a)) #define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 942" : "=r" (a))
#else #else
#define POWERPC_GET_PMC3(a) do {} while (0) #define POWERPC_GET_PMC3(a) do {} while (0)
#define POWERPC_GET_PMC4(a) do {} while (0) #define POWERPC_GET_PMC4(a) do {} while (0)
#endif #endif
#if (POWERPC_NUM_PMC_ENABLED > 4) #if (POWERPC_NUM_PMC_ENABLED > 4)
#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a)) #define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 929" : "=r" (a))
#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a)) #define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 930" : "=r" (a))
#else #else
#define POWERPC_GET_PMC5(a) do {} while (0) #define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0) #define POWERPC_GET_PMC6(a) do {} while (0)
#endif #endif
#else /* HAVE_PPC64 */ #else /* HAVE_PPC64 */
#define POWERP_PMC_DATATYPE unsigned long long #define POWERP_PMC_DATATYPE unsigned long long
#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a)) #define POWERPC_GET_PMC1(a) __asm__ volatile("mfspr %0, 771" : "=r" (a))
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a)) #define POWERPC_GET_PMC2(a) __asm__ volatile("mfspr %0, 772" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2) #if (POWERPC_NUM_PMC_ENABLED > 2)
#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a)) #define POWERPC_GET_PMC3(a) __asm__ volatile("mfspr %0, 773" : "=r" (a))
#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a)) #define POWERPC_GET_PMC4(a) __asm__ volatile("mfspr %0, 774" : "=r" (a))
#else #else
#define POWERPC_GET_PMC3(a) do {} while (0) #define POWERPC_GET_PMC3(a) do {} while (0)
#define POWERPC_GET_PMC4(a) do {} while (0) #define POWERPC_GET_PMC4(a) do {} while (0)
#endif #endif
#if (POWERPC_NUM_PMC_ENABLED > 4) #if (POWERPC_NUM_PMC_ENABLED > 4)
#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a)) #define POWERPC_GET_PMC5(a) __asm__ volatile("mfspr %0, 775" : "=r" (a))
#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a)) #define POWERPC_GET_PMC6(a) __asm__ volatile("mfspr %0, 776" : "=r" (a))
#else #else
#define POWERPC_GET_PMC5(a) do {} while (0) #define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0) #define POWERPC_GET_PMC6(a) do {} while (0)
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#if (__GNUC__ < 4) #if (__GNUC__ < 4)
# define REG_v(a) # define REG_v(a)
#else #else
# define REG_v(a) asm ( #a ) # define REG_v(a) __asm__ ( #a )
#endif #endif
#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) #if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
......
...@@ -26,12 +26,12 @@ ...@@ -26,12 +26,12 @@
#if defined(ARCH_POWERPC_405) #if defined(ARCH_POWERPC_405)
/* signed 16x16 -> 32 multiply add accumulate */ /* signed 16x16 -> 32 multiply add accumulate */
#define MAC16(rt, ra, rb) \ #define MAC16(rt, ra, rb) \
asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); __asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */ /* signed 16x16 -> 32 multiply */
#define MUL16(ra, rb) \ #define MUL16(ra, rb) \
({ int __rt; \ ({ int __rt; \
asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \ __asm__ ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
__rt; }) __rt; })
#endif #endif
......
...@@ -31,7 +31,7 @@ void ff_mmi_idct(DCTELEM *block); ...@@ -31,7 +31,7 @@ void ff_mmi_idct(DCTELEM *block);
static void clear_blocks_mmi(DCTELEM * blocks) static void clear_blocks_mmi(DCTELEM * blocks)
{ {
asm volatile( __asm__ volatile(
".set noreorder \n" ".set noreorder \n"
"addiu $9, %0, 768 \n" "addiu $9, %0, 768 \n"
"nop \n" "nop \n"
...@@ -51,7 +51,7 @@ static void clear_blocks_mmi(DCTELEM * blocks) ...@@ -51,7 +51,7 @@ static void clear_blocks_mmi(DCTELEM * blocks)
static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
{ {
asm volatile( __asm__ volatile(
".set push \n\t" ".set push \n\t"
".set mips3 \n\t" ".set mips3 \n\t"
"ld $8, 0(%0) \n\t" "ld $8, 0(%0) \n\t"
...@@ -92,7 +92,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) ...@@ -92,7 +92,7 @@ static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile( __asm__ volatile(
".set push \n\t" ".set push \n\t"
".set mips3 \n\t" ".set mips3 \n\t"
"1: \n\t" "1: \n\t"
...@@ -111,7 +111,7 @@ static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size ...@@ -111,7 +111,7 @@ static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size
static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{ {
asm volatile ( __asm__ volatile (
".set push \n\t" ".set push \n\t"
".set mips3 \n\t" ".set mips3 \n\t"
"1: \n\t" "1: \n\t"
......
...@@ -257,7 +257,7 @@ static short consttable[] align16 = { ...@@ -257,7 +257,7 @@ static short consttable[] align16 = {
pmaxh($2, $0, $2); \ pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \ ppacb($0, $2, $2); \
sd3(2, 0, 4); \ sd3(2, 0, 4); \
asm volatile ("add $4, $5, $4"); __asm__ volatile ("add $4, $5, $4");
#define DCT_8_INV_COL8_PUT() \ #define DCT_8_INV_COL8_PUT() \
PUT($16); \ PUT($16); \
...@@ -277,7 +277,7 @@ static short consttable[] align16 = { ...@@ -277,7 +277,7 @@ static short consttable[] align16 = {
pmaxh($2, $0, $2); \ pmaxh($2, $0, $2); \
ppacb($0, $2, $2); \ ppacb($0, $2, $2); \
sd3(2, 0, 4); \ sd3(2, 0, 4); \
asm volatile ("add $4, $5, $4"); __asm__ volatile ("add $4, $5, $4");
/*fixme: schedule*/ /*fixme: schedule*/
#define DCT_8_INV_COL8_ADD() \ #define DCT_8_INV_COL8_ADD() \
...@@ -294,7 +294,7 @@ static short consttable[] align16 = { ...@@ -294,7 +294,7 @@ static short consttable[] align16 = {
void ff_mmi_idct(int16_t * block) void ff_mmi_idct(int16_t * block)
{ {
/* $4 = block */ /* $4 = block */
asm volatile("la $24, %0"::"m"(consttable[0])); __asm__ volatile("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8); lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7); lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8); DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
...@@ -309,14 +309,14 @@ void ff_mmi_idct(int16_t * block) ...@@ -309,14 +309,14 @@ void ff_mmi_idct(int16_t * block)
DCT_8_INV_COL8_STORE($4); DCT_8_INV_COL8_STORE($4);
//let savedtemp regs be saved //let savedtemp regs be saved
asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
} }
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{ {
/* $4 = dest, $5 = line_size, $6 = block */ /* $4 = dest, $5 = line_size, $6 = block */
asm volatile("la $24, %0"::"m"(consttable[0])); __asm__ volatile("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8); lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7); lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
...@@ -333,14 +333,14 @@ void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block) ...@@ -333,14 +333,14 @@ void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
DCT_8_INV_COL8_PUT(); DCT_8_INV_COL8_PUT();
//let savedtemp regs be saved //let savedtemp regs be saved
asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
} }
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{ {
/* $4 = dest, $5 = line_size, $6 = block */ /* $4 = dest, $5 = line_size, $6 = block */
asm volatile("la $24, %0"::"m"(consttable[0])); __asm__ volatile("la $24, %0"::"m"(consttable[0]));
lq($24, ROUNDER_0, $8); lq($24, ROUNDER_0, $8);
lq($24, ROUNDER_1, $7); lq($24, ROUNDER_1, $7);
DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8); DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
...@@ -357,6 +357,6 @@ void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block) ...@@ -357,6 +357,6 @@ void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
DCT_8_INV_COL8_ADD(); DCT_8_INV_COL8_ADD();
//let savedtemp regs be saved //let savedtemp regs be saved
asm volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23"); __asm__ volatile(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
} }
...@@ -60,112 +60,112 @@ ...@@ -60,112 +60,112 @@
#define lq(base, off, reg) \ #define lq(base, off, reg) \
asm volatile ("lq " #reg ", %0("#base ")" : : "i" (off) ) __asm__ volatile ("lq " #reg ", %0("#base ")" : : "i" (off) )
#define lq2(mem, reg) \ #define lq2(mem, reg) \
asm volatile ("lq " #reg ", %0" : : "r" (mem)) __asm__ volatile ("lq " #reg ", %0" : : "r" (mem))
#define sq(reg, off, base) \ #define sq(reg, off, base) \
asm volatile ("sq " #reg ", %0("#base ")" : : "i" (off) ) __asm__ volatile ("sq " #reg ", %0("#base ")" : : "i" (off) )
/* /*
#define ld(base, off, reg) \ #define ld(base, off, reg) \
asm volatile ("ld " #reg ", " #off "("#base ")") __asm__ volatile ("ld " #reg ", " #off "("#base ")")
*/ */
#define ld3(base, off, reg) \ #define ld3(base, off, reg) \
asm volatile (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off))) __asm__ volatile (".word %0" : : "i" ( 0xdc000000 | (base<<21) | (reg<<16) | (off)))
#define ldr3(base, off, reg) \ #define ldr3(base, off, reg) \
asm volatile (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off))) __asm__ volatile (".word %0" : : "i" ( 0x6c000000 | (base<<21) | (reg<<16) | (off)))
#define ldl3(base, off, reg) \ #define ldl3(base, off, reg) \
asm volatile (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off))) __asm__ volatile (".word %0" : : "i" ( 0x68000000 | (base<<21) | (reg<<16) | (off)))
/* /*
#define sd(reg, off, base) \ #define sd(reg, off, base) \
asm volatile ("sd " #reg ", " #off "("#base ")") __asm__ volatile ("sd " #reg ", " #off "("#base ")")
*/ */
//seems assembler has bug encoding mnemonic 'sd', so DIY //seems assembler has bug encoding mnemonic 'sd', so DIY
#define sd3(reg, off, base) \ #define sd3(reg, off, base) \
asm volatile (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off))) __asm__ volatile (".word %0" : : "i" ( 0xfc000000 | (base<<21) | (reg<<16) | (off)))
#define sw(reg, off, base) \ #define sw(reg, off, base) \
asm volatile ("sw " #reg ", " #off "("#base ")") __asm__ volatile ("sw " #reg ", " #off "("#base ")")
#define sq2(reg, mem) \ #define sq2(reg, mem) \
asm volatile ("sq " #reg ", %0" : : "m" (*(mem))) __asm__ volatile ("sq " #reg ", %0" : : "m" (*(mem)))
#define pinth(rs, rt, rd) \ #define pinth(rs, rt, rd) \
asm volatile ("pinth " #rd ", " #rs ", " #rt ) __asm__ volatile ("pinth " #rd ", " #rs ", " #rt )
#define phmadh(rs, rt, rd) \ #define phmadh(rs, rt, rd) \
asm volatile ("phmadh " #rd ", " #rs ", " #rt ) __asm__ volatile ("phmadh " #rd ", " #rs ", " #rt )
#define pcpyud(rs, rt, rd) \ #define pcpyud(rs, rt, rd) \
asm volatile ("pcpyud " #rd ", " #rs ", " #rt ) __asm__ volatile ("pcpyud " #rd ", " #rs ", " #rt )
#define pcpyld(rs, rt, rd) \ #define pcpyld(rs, rt, rd) \
asm volatile ("pcpyld " #rd ", " #rs ", " #rt ) __asm__ volatile ("pcpyld " #rd ", " #rs ", " #rt )
#define pcpyh(rt, rd) \ #define pcpyh(rt, rd) \
asm volatile ("pcpyh " #rd ", " #rt ) __asm__ volatile ("pcpyh " #rd ", " #rt )
#define paddw(rs, rt, rd) \ #define paddw(rs, rt, rd) \
asm volatile ("paddw " #rd ", " #rs ", " #rt ) __asm__ volatile ("paddw " #rd ", " #rs ", " #rt )
#define pextlw(rs, rt, rd) \ #define pextlw(rs, rt, rd) \
asm volatile ("pextlw " #rd ", " #rs ", " #rt ) __asm__ volatile ("pextlw " #rd ", " #rs ", " #rt )
#define pextuw(rs, rt, rd) \ #define pextuw(rs, rt, rd) \
asm volatile ("pextuw " #rd ", " #rs ", " #rt ) __asm__ volatile ("pextuw " #rd ", " #rs ", " #rt )
#define pextlh(rs, rt, rd) \ #define pextlh(rs, rt, rd) \
asm volatile ("pextlh " #rd ", " #rs ", " #rt ) __asm__ volatile ("pextlh " #rd ", " #rs ", " #rt )
#define pextuh(rs, rt, rd) \ #define pextuh(rs, rt, rd) \
asm volatile ("pextuh " #rd ", " #rs ", " #rt ) __asm__ volatile ("pextuh " #rd ", " #rs ", " #rt )
#define psubw(rs, rt, rd) \ #define psubw(rs, rt, rd) \
asm volatile ("psubw " #rd ", " #rs ", " #rt ) __asm__ volatile ("psubw " #rd ", " #rs ", " #rt )
#define psraw(rt, sa, rd) \ #define psraw(rt, sa, rd) \
asm volatile ("psraw " #rd ", " #rt ", %0" : : "i"(sa) ) __asm__ volatile ("psraw " #rd ", " #rt ", %0" : : "i"(sa) )
#define ppach(rs, rt, rd) \ #define ppach(rs, rt, rd) \
asm volatile ("ppach " #rd ", " #rs ", " #rt ) __asm__ volatile ("ppach " #rd ", " #rs ", " #rt )
#define ppacb(rs, rt, rd) \ #define ppacb(rs, rt, rd) \
asm volatile ("ppacb " #rd ", " #rs ", " #rt ) __asm__ volatile ("ppacb " #rd ", " #rs ", " #rt )
#define prevh(rt, rd) \ #define prevh(rt, rd) \
asm volatile ("prevh " #rd ", " #rt ) __asm__ volatile ("prevh " #rd ", " #rt )
#define pmulth(rs, rt, rd) \ #define pmulth(rs, rt, rd) \
asm volatile ("pmulth " #rd ", " #rs ", " #rt ) __asm__ volatile ("pmulth " #rd ", " #rs ", " #rt )
#define pmaxh(rs, rt, rd) \ #define pmaxh(rs, rt, rd) \
asm volatile ("pmaxh " #rd ", " #rs ", " #rt ) __asm__ volatile ("pmaxh " #rd ", " #rs ", " #rt )
#define pminh(rs, rt, rd) \ #define pminh(rs, rt, rd) \
asm volatile ("pminh " #rd ", " #rs ", " #rt ) __asm__ volatile ("pminh " #rd ", " #rs ", " #rt )
#define pinteh(rs, rt, rd) \ #define pinteh(rs, rt, rd) \
asm volatile ("pinteh " #rd ", " #rs ", " #rt ) __asm__ volatile ("pinteh " #rd ", " #rs ", " #rt )
#define paddh(rs, rt, rd) \ #define paddh(rs, rt, rd) \
asm volatile ("paddh " #rd ", " #rs ", " #rt ) __asm__ volatile ("paddh " #rd ", " #rs ", " #rt )
#define psubh(rs, rt, rd) \ #define psubh(rs, rt, rd) \
asm volatile ("psubh " #rd ", " #rs ", " #rt ) __asm__ volatile ("psubh " #rd ", " #rs ", " #rt )
#define psrah(rt, sa, rd) \ #define psrah(rt, sa, rd) \
asm volatile ("psrah " #rd ", " #rt ", %0" : : "i"(sa) ) __asm__ volatile ("psrah " #rd ", " #rt ", %0" : : "i"(sa) )
#define pmfhl_uw(rd) \ #define pmfhl_uw(rd) \
asm volatile ("pmfhl.uw " #rd) __asm__ volatile ("pmfhl.uw " #rd)
#define pextlb(rs, rt, rd) \ #define pextlb(rs, rt, rd) \
asm volatile ("pextlb " #rd ", " #rs ", " #rt ) __asm__ volatile ("pextlb " #rd ", " #rs ", " #rt )
#endif /* AVCODEC_PS2_MMI_H */ #endif /* AVCODEC_PS2_MMI_H */
...@@ -50,7 +50,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s, ...@@ -50,7 +50,7 @@ static void dct_unquantize_h263_mmi(MpegEncContext *s,
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
} }
asm volatile( __asm__ volatile(
"add $14, $0, %3 \n\t" "add $14, $0, %3 \n\t"
"pcpyld $8, %0, %0 \n\t" "pcpyld $8, %0, %0 \n\t"
"pcpyh $8, $8 \n\t" //r8 = qmul "pcpyh $8, $8 \n\t" //r8 = qmul
......
...@@ -28,7 +28,7 @@ static void memzero_align8(void *dst,size_t size) ...@@ -28,7 +28,7 @@ static void memzero_align8(void *dst,size_t size)
#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) #if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
(char*)dst+=size; (char*)dst+=size;
size/=8*4; size/=8*4;
asm( __asm__(
#if defined(__SH4__) #if defined(__SH4__)
" fschg\n" //single float mode " fschg\n" //single float mode
#endif #endif
......
...@@ -54,7 +54,7 @@ static const float odd_table[] __attribute__ ((aligned(8))) = { ...@@ -54,7 +54,7 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
#define load_matrix(table) \ #define load_matrix(table) \
asm volatile( \ __asm__ volatile( \
" fschg\n" \ " fschg\n" \
" fmov @%0+,xd0\n" \ " fmov @%0+,xd0\n" \
" fmov @%0+,xd2\n" \ " fmov @%0+,xd2\n" \
...@@ -71,15 +71,15 @@ static const float odd_table[] __attribute__ ((aligned(8))) = { ...@@ -71,15 +71,15 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
) )
#define ftrv() \ #define ftrv() \
asm volatile("ftrv xmtrx,fv0" \ __asm__ volatile("ftrv xmtrx,fv0" \
: "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
: "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
#define DEFREG \ #define DEFREG \
register float fr0 asm("fr0"); \ register float fr0 __asm__("fr0"); \
register float fr1 asm("fr1"); \ register float fr1 __asm__("fr1"); \
register float fr2 asm("fr2"); \ register float fr2 __asm__("fr2"); \
register float fr3 asm("fr3") register float fr3 __asm__("fr3")
#else #else
......
...@@ -59,11 +59,11 @@ ...@@ -59,11 +59,11 @@
/* signed 16x16 -> 32 multiply add accumulate */ /* signed 16x16 -> 32 multiply add accumulate */
#define MAC16(rt, ra, rb) \ #define MAC16(rt, ra, rb) \
asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); __asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
/* signed 16x16 -> 32 multiply */ /* signed 16x16 -> 32 multiply */
#define MUL16(rt, ra, rb) \ #define MUL16(rt, ra, rb) \
asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); __asm__ ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
#else #else
......
...@@ -388,7 +388,7 @@ inline void ff_simple_idct_vis(DCTELEM *data) { ...@@ -388,7 +388,7 @@ inline void ff_simple_idct_vis(DCTELEM *data) {
int out1, out2, out3, out4; int out1, out2, out3, out4;
DECLARE_ALIGNED_8(int16_t, temp[8*8]); DECLARE_ALIGNED_8(int16_t, temp[8*8]);
asm volatile( __asm__ volatile(
INIT_IDCT INIT_IDCT
#define ADDROUNDER #define ADDROUNDER
...@@ -428,7 +428,7 @@ void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data) { ...@@ -428,7 +428,7 @@ void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data) {
int out1, out2, out3, out4, out5; int out1, out2, out3, out4, out5;
int r1, r2, r3, r4, r5, r6, r7; int r1, r2, r3, r4, r5, r6, r7;
asm volatile( __asm__ volatile(
"wr %%g0, 0x8, %%gsr \n\t" "wr %%g0, 0x8, %%gsr \n\t"
INIT_IDCT INIT_IDCT
...@@ -478,7 +478,7 @@ void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data) { ...@@ -478,7 +478,7 @@ void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data) {
int out1, out2, out3, out4, out5, out6; int out1, out2, out3, out4, out5, out6;
int r1, r2, r3, r4, r5, r6, r7; int r1, r2, r3, r4, r5, r6, r7;
asm volatile( __asm__ volatile(
"wr %%g0, 0x8, %%gsr \n\t" "wr %%g0, 0x8, %%gsr \n\t"
INIT_IDCT INIT_IDCT
......
...@@ -55,97 +55,97 @@ ...@@ -55,97 +55,97 @@
#define vis_rd_d(X) (vis_dreg(X) << 25) #define vis_rd_d(X) (vis_dreg(X) << 25)
#define vis_ss2s(opf,rs1,rs2,rd) \ #define vis_ss2s(opf,rs1,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \ vis_rs1_s(rs1) | \
vis_rs2_s(rs2) | \ vis_rs2_s(rs2) | \
vis_rd_s(rd))) vis_rd_s(rd)))
#define vis_dd2d(opf,rs1,rs2,rd) \ #define vis_dd2d(opf,rs1,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_d(rs1) | \ vis_rs1_d(rs1) | \
vis_rs2_d(rs2) | \ vis_rs2_d(rs2) | \
vis_rd_d(rd))) vis_rd_d(rd)))
#define vis_ss2d(opf,rs1,rs2,rd) \ #define vis_ss2d(opf,rs1,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \ vis_rs1_s(rs1) | \
vis_rs2_s(rs2) | \ vis_rs2_s(rs2) | \
vis_rd_d(rd))) vis_rd_d(rd)))
#define vis_sd2d(opf,rs1,rs2,rd) \ #define vis_sd2d(opf,rs1,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \ vis_rs1_s(rs1) | \
vis_rs2_d(rs2) | \ vis_rs2_d(rs2) | \
vis_rd_d(rd))) vis_rd_d(rd)))
#define vis_d2s(opf,rs2,rd) \ #define vis_d2s(opf,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_d(rs2) | \ vis_rs2_d(rs2) | \
vis_rd_s(rd))) vis_rd_s(rd)))
#define vis_s2d(opf,rs2,rd) \ #define vis_s2d(opf,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_s(rs2) | \ vis_rs2_s(rs2) | \
vis_rd_d(rd))) vis_rd_d(rd)))
#define vis_d12d(opf,rs1,rd) \ #define vis_d12d(opf,rs1,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_d(rs1) | \ vis_rs1_d(rs1) | \
vis_rd_d(rd))) vis_rd_d(rd)))
#define vis_d22d(opf,rs2,rd) \ #define vis_d22d(opf,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_d(rs2) | \ vis_rs2_d(rs2) | \
vis_rd_d(rd))) vis_rd_d(rd)))
#define vis_s12s(opf,rs1,rd) \ #define vis_s12s(opf,rs1,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs1_s(rs1) | \ vis_rs1_s(rs1) | \
vis_rd_s(rd))) vis_rd_s(rd)))
#define vis_s22s(opf,rs2,rd) \ #define vis_s22s(opf,rs2,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rs2_s(rs2) | \ vis_rs2_s(rs2) | \
vis_rd_s(rd))) vis_rd_s(rd)))
#define vis_s(opf,rd) \ #define vis_s(opf,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rd_s(rd))) vis_rd_s(rd)))
#define vis_d(opf,rd) \ #define vis_d(opf,rd) \
asm volatile (".word %0" \ __asm__ volatile (".word %0" \
: : "i" (vis_opc_base | vis_opf(opf) | \ : : "i" (vis_opc_base | vis_opf(opf) | \
vis_rd_d(rd))) vis_rd_d(rd)))
#define vis_r2m(op,rd,mem) \ #define vis_r2m(op,rd,mem) \
asm volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) __asm__ volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
#define vis_r2m_2(op,rd,mem1,mem2) \ #define vis_r2m_2(op,rd,mem1,mem2) \
asm volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) __asm__ volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
#define vis_m2r(op,mem,rd) \ #define vis_m2r(op,mem,rd) \
asm volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) __asm__ volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
#define vis_m2r_2(op,mem1,mem2,rd) \ #define vis_m2r_2(op,mem1,mem2,rd) \
asm volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) __asm__ volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
static inline void vis_set_gsr(unsigned int _val) static inline void vis_set_gsr(unsigned int _val)
{ {
register unsigned int val asm("g1"); register unsigned int val __asm__("g1");
val = _val; val = _val;
asm volatile(".word 0xa7804000" __asm__ volatile(".word 0xa7804000"
: : "r" (val)); : : "r" (val));
} }
...@@ -164,9 +164,9 @@ static inline void vis_set_gsr(unsigned int _val) ...@@ -164,9 +164,9 @@ static inline void vis_set_gsr(unsigned int _val)
#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) #define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2)
#define vis_ldblk(mem, rd) \ #define vis_ldblk(mem, rd) \
do { register void *__mem asm("g1"); \ do { register void *__mem __asm__("g1"); \
__mem = &(mem); \ __mem = &(mem); \
asm volatile(".word 0xc1985e00 | %1" \ __asm__ volatile(".word 0xc1985e00 | %1" \
: \ : \
: "r" (__mem), \ : "r" (__mem), \
"i" (vis_rd_d(rd)) \ "i" (vis_rd_d(rd)) \
...@@ -174,9 +174,9 @@ do { register void *__mem asm("g1"); \ ...@@ -174,9 +174,9 @@ do { register void *__mem asm("g1"); \
} while (0) } while (0)
#define vis_stblk(rd, mem) \ #define vis_stblk(rd, mem) \
do { register void *__mem asm("g1"); \ do { register void *__mem __asm__("g1"); \
__mem = &(mem); \ __mem = &(mem); \
asm volatile(".word 0xc1b85e00 | %1" \ __asm__ volatile(".word 0xc1b85e00 | %1" \
: \ : \
: "r" (__mem), \ : "r" (__mem), \
"i" (vis_rd_d(rd)) \ "i" (vis_rd_d(rd)) \
...@@ -184,10 +184,10 @@ do { register void *__mem asm("g1"); \ ...@@ -184,10 +184,10 @@ do { register void *__mem asm("g1"); \
} while (0) } while (0)
#define vis_membar_storestore() \ #define vis_membar_storestore() \
asm volatile(".word 0x8143e008" : : : "memory") __asm__ volatile(".word 0x8143e008" : : : "memory")
#define vis_membar_sync() \ #define vis_membar_sync() \
asm volatile(".word 0x8143e040" : : : "memory") __asm__ volatile(".word 0x8143e040" : : : "memory")
/* 16 and 32 bit partitioned addition and subtraction. The normal /* 16 and 32 bit partitioned addition and subtraction. The normal
* versions perform 4 16-bit or 2 32-bit additions or subtractions. * versions perform 4 16-bit or 2 32-bit additions or subtractions.
...@@ -226,11 +226,11 @@ do { register void *__mem asm("g1"); \ ...@@ -226,11 +226,11 @@ do { register void *__mem asm("g1"); \
static inline void *vis_alignaddr(void *_ptr) static inline void *vis_alignaddr(void *_ptr)
{ {
register void *ptr asm("g1"); register void *ptr __asm__("g1");
ptr = _ptr; ptr = _ptr;
asm volatile(".word %2" __asm__ volatile(".word %2"
: "=&r" (ptr) : "=&r" (ptr)
: "0" (ptr), : "0" (ptr),
"i" (vis_opc_base | vis_opf(0x18) | "i" (vis_opc_base | vis_opf(0x18) |
...@@ -243,11 +243,11 @@ static inline void *vis_alignaddr(void *_ptr) ...@@ -243,11 +243,11 @@ static inline void *vis_alignaddr(void *_ptr)
static inline void vis_alignaddr_g0(void *_ptr) static inline void vis_alignaddr_g0(void *_ptr)
{ {
register void *ptr asm("g1"); register void *ptr __asm__("g1");
ptr = _ptr; ptr = _ptr;
asm volatile(".word %2" __asm__ volatile(".word %2"
: "=&r" (ptr) : "=&r" (ptr)
: "0" (ptr), : "0" (ptr),
"i" (vis_opc_base | vis_opf(0x18) | "i" (vis_opc_base | vis_opf(0x18) |
...@@ -258,11 +258,11 @@ static inline void vis_alignaddr_g0(void *_ptr) ...@@ -258,11 +258,11 @@ static inline void vis_alignaddr_g0(void *_ptr)
static inline void *vis_alignaddrl(void *_ptr) static inline void *vis_alignaddrl(void *_ptr)
{ {
register void *ptr asm("g1"); register void *ptr __asm__("g1");
ptr = _ptr; ptr = _ptr;
asm volatile(".word %2" __asm__ volatile(".word %2"
: "=&r" (ptr) : "=&r" (ptr)
: "0" (ptr), : "0" (ptr),
"i" (vis_opc_base | vis_opf(0x19) | "i" (vis_opc_base | vis_opf(0x19) |
...@@ -275,11 +275,11 @@ static inline void *vis_alignaddrl(void *_ptr) ...@@ -275,11 +275,11 @@ static inline void *vis_alignaddrl(void *_ptr)
static inline void vis_alignaddrl_g0(void *_ptr) static inline void vis_alignaddrl_g0(void *_ptr)
{ {
register void *ptr asm("g1"); register void *ptr __asm__("g1");
ptr = _ptr; ptr = _ptr;
asm volatile(".word %2" __asm__ volatile(".word %2"
: "=&r" (ptr) : "=&r" (ptr)
: "0" (ptr), : "0" (ptr),
"i" (vis_opc_base | vis_opf(0x19) | "i" (vis_opc_base | vis_opf(0x19) |
......
...@@ -33,11 +33,11 @@ ...@@ -33,11 +33,11 @@
static av_always_inline av_const uint16_t bswap_16(uint16_t x) static av_always_inline av_const uint16_t bswap_16(uint16_t x)
{ {
#if defined(ARCH_X86) #if defined(ARCH_X86)
asm("rorw $8, %0" : "+r"(x)); __asm__("rorw $8, %0" : "+r"(x));
#elif defined(ARCH_SH4) #elif defined(ARCH_SH4)
asm("swap.b %0,%0" : "=r"(x) : "0"(x)); __asm__("swap.b %0,%0" : "=r"(x) : "0"(x));
#elif defined(HAVE_ARMV6) #elif defined(HAVE_ARMV6)
asm("rev16 %0, %0" : "+r"(x)); __asm__("rev16 %0, %0" : "+r"(x));
#else #else
x= (x>>8) | (x<<8); x= (x>>8) | (x<<8);
#endif #endif
...@@ -48,30 +48,30 @@ static av_always_inline av_const uint32_t bswap_32(uint32_t x) ...@@ -48,30 +48,30 @@ static av_always_inline av_const uint32_t bswap_32(uint32_t x)
{ {
#if defined(ARCH_X86) #if defined(ARCH_X86)
#ifdef HAVE_BSWAP #ifdef HAVE_BSWAP
asm("bswap %0" : "+r" (x)); __asm__("bswap %0" : "+r" (x));
#else #else
asm("rorw $8, %w0 \n\t" __asm__("rorw $8, %w0 \n\t"
"rorl $16, %0 \n\t" "rorl $16, %0 \n\t"
"rorw $8, %w0" "rorw $8, %w0"
: "+r"(x)); : "+r"(x));
#endif #endif
#elif defined(ARCH_SH4) #elif defined(ARCH_SH4)
asm("swap.b %0,%0\n" __asm__("swap.b %0,%0\n"
"swap.w %0,%0\n" "swap.w %0,%0\n"
"swap.b %0,%0\n" "swap.b %0,%0\n"
: "=r"(x) : "0"(x)); : "=r"(x) : "0"(x));
#elif defined(HAVE_ARMV6) #elif defined(HAVE_ARMV6)
asm("rev %0, %0" : "+r"(x)); __asm__("rev %0, %0" : "+r"(x));
#elif defined(ARCH_ARMV4L) #elif defined(ARCH_ARMV4L)
uint32_t t; uint32_t t;
asm ("eor %1, %0, %0, ror #16 \n\t" __asm__ ("eor %1, %0, %0, ror #16 \n\t"
"bic %1, %1, #0xFF0000 \n\t" "bic %1, %1, #0xFF0000 \n\t"
"mov %0, %0, ror #8 \n\t" "mov %0, %0, ror #8 \n\t"
"eor %0, %0, %1, lsr #8 \n\t" "eor %0, %0, %1, lsr #8 \n\t"
: "+r"(x), "+r"(t)); : "+r"(x), "+r"(t));
#elif defined(ARCH_BFIN) #elif defined(ARCH_BFIN)
unsigned tmp; unsigned tmp;
asm("%1 = %0 >> 8 (V); \n\t" __asm__("%1 = %0 >> 8 (V); \n\t"
"%0 = %0 << 8 (V); \n\t" "%0 = %0 << 8 (V); \n\t"
"%0 = %0 | %1; \n\t" "%0 = %0 | %1; \n\t"
"%0 = PACK(%0.L, %0.H); \n\t" "%0 = PACK(%0.L, %0.H); \n\t"
...@@ -90,7 +90,7 @@ static inline uint64_t av_const bswap_64(uint64_t x) ...@@ -90,7 +90,7 @@ static inline uint64_t av_const bswap_64(uint64_t x)
x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL); x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL);
return (x>>32) | (x<<32); return (x>>32) | (x<<32);
#elif defined(ARCH_X86_64) #elif defined(ARCH_X86_64)
asm("bswap %0": "=r" (x) : "0" (x)); __asm__("bswap %0": "=r" (x) : "0" (x));
return x; return x;
#else #else
union { union {
......
...@@ -154,7 +154,7 @@ static inline av_const int mid_pred(int a, int b, int c) ...@@ -154,7 +154,7 @@ static inline av_const int mid_pred(int a, int b, int c)
{ {
#ifdef HAVE_CMOV #ifdef HAVE_CMOV
int i=b; int i=b;
asm volatile( __asm__ volatile(
"cmp %2, %1 \n\t" "cmp %2, %1 \n\t"
"cmovg %1, %0 \n\t" "cmovg %1, %0 \n\t"
"cmovg %2, %1 \n\t" "cmovg %2, %1 \n\t"
...@@ -327,7 +327,7 @@ static inline av_pure int ff_get_fourcc(const char *s){ ...@@ -327,7 +327,7 @@ static inline av_pure int ff_get_fourcc(const char *s){
static inline uint64_t read_time(void) static inline uint64_t read_time(void)
{ {
uint64_t a, d; uint64_t a, d;
asm volatile("rdtsc\n\t" __asm__ volatile("rdtsc\n\t"
: "=a" (a), "=d" (d)); : "=a" (a), "=d" (d));
return (d << 32) | (a & 0xffffffff); return (d << 32) | (a & 0xffffffff);
} }
...@@ -335,7 +335,7 @@ static inline uint64_t read_time(void) ...@@ -335,7 +335,7 @@ static inline uint64_t read_time(void)
static inline long long read_time(void) static inline long long read_time(void)
{ {
long long l; long long l;
asm volatile("rdtsc\n\t" __asm__ volatile("rdtsc\n\t"
: "=A" (l)); : "=A" (l));
return l; return l;
} }
...@@ -349,7 +349,7 @@ static inline uint64_t read_time(void) ...@@ -349,7 +349,7 @@ static inline uint64_t read_time(void)
} p; } p;
unsigned long long c; unsigned long long c;
} t; } t;
asm volatile ("%0=cycles; %1=cycles2;" : "=d" (t.p.lo), "=d" (t.p.hi)); __asm__ volatile ("%0=cycles; %1=cycles2;" : "=d" (t.p.lo), "=d" (t.p.hi));
return t.c; return t.c;
} }
#else //FIXME check ppc64 #else //FIXME check ppc64
...@@ -358,7 +358,7 @@ static inline uint64_t read_time(void) ...@@ -358,7 +358,7 @@ static inline uint64_t read_time(void)
uint32_t tbu, tbl, temp; uint32_t tbu, tbl, temp;
/* from section 2.2.1 of the 32-bit PowerPC PEM */ /* from section 2.2.1 of the 32-bit PowerPC PEM */
asm volatile( __asm__ volatile(
"1:\n" "1:\n"
"mftbu %2\n" "mftbu %2\n"
"mftb %0\n" "mftb %0\n"
......
...@@ -130,7 +130,7 @@ extern const uint32_t ff_inverse[256]; ...@@ -130,7 +130,7 @@ extern const uint32_t ff_inverse[256];
# define FASTDIV(a,b) \ # define FASTDIV(a,b) \
({\ ({\
int ret,dmy;\ int ret,dmy;\
asm volatile(\ __asm__ volatile(\
"mull %3"\ "mull %3"\
:"=d"(ret),"=a"(dmy)\ :"=d"(ret),"=a"(dmy)\
:"1"(a),"g"(ff_inverse[b])\ :"1"(a),"g"(ff_inverse[b])\
...@@ -141,7 +141,7 @@ extern const uint32_t ff_inverse[256]; ...@@ -141,7 +141,7 @@ extern const uint32_t ff_inverse[256];
static inline av_const int FASTDIV(int a, int b) static inline av_const int FASTDIV(int a, int b)
{ {
int r; int r;
asm volatile("cmp %2, #0 \n\t" __asm__ volatile("cmp %2, #0 \n\t"
"smmul %0, %1, %2 \n\t" "smmul %0, %1, %2 \n\t"
"rsblt %0, %0, #0 \n\t" "rsblt %0, %0, #0 \n\t"
: "=r"(r) : "r"(a), "r"(ff_inverse[b])); : "=r"(r) : "r"(a), "r"(ff_inverse[b]));
...@@ -151,7 +151,7 @@ static inline av_const int FASTDIV(int a, int b) ...@@ -151,7 +151,7 @@ static inline av_const int FASTDIV(int a, int b)
# define FASTDIV(a,b) \ # define FASTDIV(a,b) \
({\ ({\
int ret,dmy;\ int ret,dmy;\
asm volatile(\ __asm__ volatile(\
"umull %1, %0, %2, %3"\ "umull %1, %0, %2, %3"\
:"=&r"(ret),"=&r"(dmy)\ :"=&r"(ret),"=&r"(dmy)\
:"r"(a),"r"(ff_inverse[b])\ :"r"(a),"r"(ff_inverse[b])\
...@@ -190,7 +190,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a) ...@@ -190,7 +190,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a)
#if defined(ARCH_X86) #if defined(ARCH_X86)
#define MASK_ABS(mask, level)\ #define MASK_ABS(mask, level)\
asm volatile(\ __asm__ volatile(\
"cltd \n\t"\ "cltd \n\t"\
"xorl %1, %0 \n\t"\ "xorl %1, %0 \n\t"\
"subl %1, %0 \n\t"\ "subl %1, %0 \n\t"\
...@@ -204,7 +204,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a) ...@@ -204,7 +204,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a)
#ifdef HAVE_CMOV #ifdef HAVE_CMOV
#define COPY3_IF_LT(x,y,a,b,c,d)\ #define COPY3_IF_LT(x,y,a,b,c,d)\
asm volatile (\ __asm__ volatile (\
"cmpl %0, %3 \n\t"\ "cmpl %0, %3 \n\t"\
"cmovl %3, %0 \n\t"\ "cmovl %3, %0 \n\t"\
"cmovl %4, %1 \n\t"\ "cmovl %4, %1 \n\t"\
......
...@@ -156,28 +156,28 @@ static const char *replaceTable[]= ...@@ -156,28 +156,28 @@ static const char *replaceTable[]=
#if defined(ARCH_X86) #if defined(ARCH_X86)
static inline void prefetchnta(void *p) static inline void prefetchnta(void *p)
{ {
asm volatile( "prefetchnta (%0)\n\t" __asm__ volatile( "prefetchnta (%0)\n\t"
: : "r" (p) : : "r" (p)
); );
} }
static inline void prefetcht0(void *p) static inline void prefetcht0(void *p)
{ {
asm volatile( "prefetcht0 (%0)\n\t" __asm__ volatile( "prefetcht0 (%0)\n\t"
: : "r" (p) : : "r" (p)
); );
} }
static inline void prefetcht1(void *p) static inline void prefetcht1(void *p)
{ {
asm volatile( "prefetcht1 (%0)\n\t" __asm__ volatile( "prefetcht1 (%0)\n\t"
: : "r" (p) : : "r" (p)
); );
} }
static inline void prefetcht2(void *p) static inline void prefetcht2(void *p)
{ {
asm volatile( "prefetcht2 (%0)\n\t" __asm__ volatile( "prefetcht2 (%0)\n\t"
: : "r" (p) : : "r" (p)
); );
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment