Commit ef4a6514 authored by Mans Rullgard's avatar Mans Rullgard

Replace ASMALIGN() with .p2align

This macro has unconditionally used .p2align for a long time and
serves no useful purpose.
parent ac3c9d01
...@@ -3245,7 +3245,6 @@ cat > $TMPH <<EOF ...@@ -3245,7 +3245,6 @@ cat > $TMPH <<EOF
#define CC_TYPE "$cc_type" #define CC_TYPE "$cc_type"
#define CC_VERSION $cc_version #define CC_VERSION $cc_version
#define restrict $_restrict #define restrict $_restrict
#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\\n\\t"
#define EXTERN_PREFIX "${extern_prefix}" #define EXTERN_PREFIX "${extern_prefix}"
#define EXTERN_ASM ${extern_prefix} #define EXTERN_ASM ${extern_prefix}
#define SLIBSUF "$SLIBSUF" #define SLIBSUF "$SLIBSUF"
......
...@@ -81,7 +81,7 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF ...@@ -81,7 +81,7 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF
DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) #define JUMPALIGN() __asm__ volatile (".p2align 3"::)
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
#define MOVQ_BFE(regd) \ #define MOVQ_BFE(regd) \
...@@ -368,7 +368,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size ...@@ -368,7 +368,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
{ {
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
"movd (%1, %3), %%mm1 \n\t" "movd (%1, %3), %%mm1 \n\t"
...@@ -394,7 +394,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size ...@@ -394,7 +394,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
{ {
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
...@@ -420,7 +420,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -420,7 +420,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
{ {
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm4 \n\t" "movq 8(%1), %%mm4 \n\t"
......
...@@ -838,7 +838,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -838,7 +838,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
PAVGB" 1(%1), %%mm0 \n\t" PAVGB" 1(%1), %%mm0 \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"REG_a"), %%mm2 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
......
...@@ -37,7 +37,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[ ...@@ -37,7 +37,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
"movd %4, %%mm5 \n\t" "movd %4, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %0), %%mm0 \n\t" "movq (%1, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t" "movq 8(%1, %0), %%mm1 \n\t"
...@@ -77,7 +77,7 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) ...@@ -77,7 +77,7 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
"movd %3, %%mm5 \n\t" "movd %3, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t"
"punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %0), %%mm0 \n\t" "movq (%1, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t" "movq 8(%1, %0), %%mm1 \n\t"
......
...@@ -30,7 +30,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -30,7 +30,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
...@@ -71,7 +71,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -71,7 +71,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
"movq %%mm4, (%3) \n\t" "movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t" "add %5, %3 \n\t"
"decl %0 \n\t" "decl %0 \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t" "movq (%2), %%mm1 \n\t"
...@@ -112,7 +112,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -112,7 +112,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
...@@ -170,7 +170,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -170,7 +170,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
"movq %%mm5, 8(%3) \n\t" "movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t" "add %5, %3 \n\t"
"decl %0 \n\t" "decl %0 \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t" "movq (%2), %%mm1 \n\t"
...@@ -208,7 +208,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -208,7 +208,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm2 \n\t" "movq (%1, %%"REG_a"),%%mm2 \n\t"
...@@ -248,7 +248,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -248,7 +248,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
...@@ -460,7 +460,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -460,7 +460,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"REG_a"), %%mm2 \n\t"
...@@ -511,7 +511,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -511,7 +511,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
ASMALIGN(3) ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
......
...@@ -35,7 +35,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) ...@@ -35,7 +35,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
__asm__ volatile( __asm__ volatile(
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0), %%mm0 \n\t" "movq (%0), %%mm0 \n\t"
"movq (%0, %2), %%mm2 \n\t" "movq (%0, %2), %%mm2 \n\t"
...@@ -97,7 +97,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint ...@@ -97,7 +97,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint
__asm__ volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0), %%mm0 \n\t" "movq (%0), %%mm0 \n\t"
"movq (%1), %%mm2 \n\t" "movq (%1), %%mm2 \n\t"
......
...@@ -356,7 +356,7 @@ inline void ff_idct_xvid_sse2(short *block) ...@@ -356,7 +356,7 @@ inline void ff_idct_xvid_sse2(short *block)
TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6)) TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))
TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7)) TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))
iLLM_HEAD iLLM_HEAD
ASMALIGN(4) ".p2align 4 \n\t"
JNZ("%%ecx", "2f") JNZ("%%ecx", "2f")
JNZ("%%eax", "3f") JNZ("%%eax", "3f")
JNZ("%%edx", "4f") JNZ("%%edx", "4f")
......
...@@ -38,7 +38,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -38,7 +38,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
x86_reg len= -(stride*h); x86_reg len= -(stride*h);
__asm__ volatile( __asm__ volatile(
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"REG_a"), %%mm2 \n\t"
...@@ -73,7 +73,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -73,7 +73,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
__asm__ volatile( __asm__ volatile(
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
...@@ -95,7 +95,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ...@@ -95,7 +95,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
int ret; int ret;
__asm__ volatile( __asm__ volatile(
"pxor %%xmm2, %%xmm2 \n\t" "pxor %%xmm2, %%xmm2 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movdqu (%1), %%xmm0 \n\t" "movdqu (%1), %%xmm0 \n\t"
"movdqu (%1, %4), %%xmm1 \n\t" "movdqu (%1, %4), %%xmm1 \n\t"
...@@ -119,7 +119,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ...@@ -119,7 +119,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
__asm__ volatile( __asm__ volatile(
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
...@@ -143,7 +143,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ...@@ -143,7 +143,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
__asm__ volatile( __asm__ volatile(
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm1 \n\t" "movq (%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t" "movq (%1, %3), %%mm2 \n\t"
...@@ -170,7 +170,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -170,7 +170,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"pavgb 1(%1), %%mm0 \n\t" "pavgb 1(%1), %%mm0 \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm1 \n\t" "movq (%1), %%mm1 \n\t"
"movq (%1,%3), %%mm2 \n\t" "movq (%1,%3), %%mm2 \n\t"
...@@ -197,7 +197,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int ...@@ -197,7 +197,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
{ {
x86_reg len= -(stride*h); x86_reg len= -(stride*h);
__asm__ volatile( __asm__ volatile(
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t" "movq (%2, %%"REG_a"), %%mm1 \n\t"
...@@ -245,7 +245,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -245,7 +245,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"punpckhbw %%mm7, %%mm3 \n\t" "punpckhbw %%mm7, %%mm3 \n\t"
"paddw %%mm2, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
"paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm1 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"REG_a"), %%mm2 \n\t"
"movq 1(%2, %%"REG_a"), %%mm4 \n\t" "movq 1(%2, %%"REG_a"), %%mm4 \n\t"
......
...@@ -66,7 +66,7 @@ __asm__ volatile( ...@@ -66,7 +66,7 @@ __asm__ volatile(
"packssdw %%mm5, %%mm5 \n\t" "packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t" "psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %3), %%mm0 \n\t" "movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t" "movq 8(%0, %3), %%mm1 \n\t"
...@@ -129,7 +129,7 @@ __asm__ volatile( ...@@ -129,7 +129,7 @@ __asm__ volatile(
"packssdw %%mm5, %%mm5 \n\t" "packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t" "psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %3), %%mm0 \n\t" "movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t" "movq 8(%0, %3), %%mm1 \n\t"
...@@ -222,7 +222,7 @@ __asm__ volatile( ...@@ -222,7 +222,7 @@ __asm__ volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
...@@ -285,7 +285,7 @@ __asm__ volatile( ...@@ -285,7 +285,7 @@ __asm__ volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
...@@ -357,7 +357,7 @@ __asm__ volatile( ...@@ -357,7 +357,7 @@ __asm__ volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
...@@ -418,7 +418,7 @@ __asm__ volatile( ...@@ -418,7 +418,7 @@ __asm__ volatile(
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"REG_a"), %%mm1 \n\t"
......
...@@ -158,7 +158,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -158,7 +158,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"pxor "MM"6, "MM"6 \n\t" "pxor "MM"6, "MM"6 \n\t"
"psubw (%3), "MM"6 \n\t" // -bias[0] "psubw (%3), "MM"6 \n\t" // -bias[0]
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN(MM"1", MM"0") // ABS(block[i])
...@@ -190,7 +190,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -190,7 +190,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"7, "MM"7 \n\t" // 0
"pxor "MM"4, "MM"4 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN(MM"1", MM"0") // ABS(block[i])
......
...@@ -789,7 +789,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -789,7 +789,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#" ASMALIGN(4) \ "# .p2align 4 \n\t"\
"4: \n\t" "4: \n\t"
Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f) Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f) Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
...@@ -864,7 +864,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -864,7 +864,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#" ASMALIGN(4) \ "# .p2align 4 \n\t"\
"6: \n\t" "6: \n\t"
Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f) Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
...@@ -930,7 +930,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -930,7 +930,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#" ASMALIGN(4) \ "# .p2align 4 \n\t"\
"2: \n\t" "2: \n\t"
Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f) Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
...@@ -1007,7 +1007,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -1007,7 +1007,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#" ASMALIGN(4) \ "# .p2align 4 \n\t"\
"3: \n\t" "3: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
...@@ -1071,7 +1071,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -1071,7 +1071,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#" ASMALIGN(4) \ "# .p2align 4 \n\t"\
"5: \n\t" "5: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
...@@ -1136,7 +1136,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20) ...@@ -1136,7 +1136,7 @@ IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#" ASMALIGN(4) \ "# .p2align 4 \n\t"\
"1: \n\t" "1: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
...@@ -1210,7 +1210,7 @@ IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20) ...@@ -1210,7 +1210,7 @@ IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
"jmp 9f \n\t" "jmp 9f \n\t"
"#" ASMALIGN(4) "# .p2align 4 \n\t"
"7: \n\t" "7: \n\t"
#undef IDCT #undef IDCT
#define IDCT(src0, src4, src1, src5, dst, shift) \ #define IDCT(src0, src4, src1, src5, dst, shift) \
......
...@@ -275,7 +275,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ ...@@ -275,7 +275,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \
LOAD_ROUNDER_MMX("%5") \ LOAD_ROUNDER_MMX("%5") \
"movq "MANGLE(ff_pw_53)", %%mm5\n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5\n\t" \
"movq "MANGLE(ff_pw_18)", %%mm6\n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6\n\t" \
ASMALIGN(3) \ ".p2align 3 \n\t" \
"1: \n\t" \ "1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \
NORMALIZE_MMX("%6") \ NORMALIZE_MMX("%6") \
...@@ -331,7 +331,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ ...@@ -331,7 +331,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \
LOAD_ROUNDER_MMX("%4") \ LOAD_ROUNDER_MMX("%4") \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
ASMALIGN(3) \ ".p2align 3 \n\t" \
"1: \n\t" \ "1: \n\t" \
MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \ MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \
NORMALIZE_MMX("$7") \ NORMALIZE_MMX("$7") \
...@@ -369,7 +369,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ ...@@ -369,7 +369,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \
LOAD_ROUNDER_MMX("%6") \ LOAD_ROUNDER_MMX("%6") \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
ASMALIGN(3) \ ".p2align 3 \n\t" \
"1: \n\t" \ "1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \
NORMALIZE_MMX("$6") \ NORMALIZE_MMX("$6") \
......
...@@ -329,7 +329,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -329,7 +329,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
"movq %4, %%mm6 \n\t" "movq %4, %%mm6 \n\t"
"movq %5, %%mm7 \n\t" "movq %5, %%mm7 \n\t"
"jmp 2f \n\t" "jmp 2f \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1) \n\t" PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
...@@ -484,7 +484,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -484,7 +484,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
"movq %4, %%mm6 \n\t" "movq %4, %%mm6 \n\t"
"movq %5, %%mm7 \n\t" "movq %5, %%mm7 \n\t"
"jmp 2f \n\t" "jmp 2f \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1) \n\t" PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t" "movd (%1), %%mm0 \n\t"
...@@ -1239,7 +1239,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, ...@@ -1239,7 +1239,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst,
"pxor %4, %%mm7 \n\t" "pxor %4, %%mm7 \n\t"
"movq %%mm7, %%mm6 \n\t" "movq %%mm7, %%mm6 \n\t"
"pxor %5, %%mm7 \n\t" "pxor %5, %%mm7 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1, %0) \n\t" PREFETCH" 32(%1, %0) \n\t"
"movq (%1, %0), %%mm0 \n\t" "movq (%1, %0), %%mm0 \n\t"
...@@ -1300,7 +1300,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1300,7 +1300,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
"movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24r)", %%mm5 \n\t"
"movq "MANGLE(mask24g)", %%mm6 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t"
"movq "MANGLE(mask24b)", %%mm7 \n\t" "movq "MANGLE(mask24b)", %%mm7 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1, %%"REG_a") \n\t" PREFETCH" 32(%1, %%"REG_a") \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
...@@ -1369,7 +1369,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1369,7 +1369,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t" PREFETCH" 32(%2, %%"REG_a") \n\t"
...@@ -1519,7 +1519,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ...@@ -1519,7 +1519,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t" PREFETCH" 32(%2, %%"REG_a") \n\t"
...@@ -1648,7 +1648,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1648,7 +1648,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00... "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
...@@ -1701,7 +1701,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1701,7 +1701,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
...@@ -1884,7 +1884,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1884,7 +1884,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00... "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
...@@ -1937,7 +1937,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1937,7 +1937,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
...@@ -2012,7 +2012,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -2012,7 +2012,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"movq "MANGLE(ff_w1111)", %%mm5 \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_d") \n\t" PREFETCH" 64(%0, %%"REG_d") \n\t"
"movd (%0, %%"REG_d"), %%mm0 \n\t" "movd (%0, %%"REG_d"), %%mm0 \n\t"
...@@ -2086,7 +2086,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -2086,7 +2086,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
"add %%"REG_d", %%"REG_d" \n\t" "add %%"REG_d", %%"REG_d" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_d") \n\t" PREFETCH" 64(%0, %%"REG_d") \n\t"
PREFETCH" 64(%1, %%"REG_d") \n\t" PREFETCH" 64(%1, %%"REG_d") \n\t"
......
...@@ -55,7 +55,7 @@ ...@@ -55,7 +55,7 @@
"movq %%mm3, %%mm4 \n\t"\ "movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\
ASMALIGN(4) /* FIXME Unroll? */\ ".p2align 4 \n\t" /* FIXME Unroll? */\
"1: \n\t"\ "1: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
...@@ -93,7 +93,7 @@ ...@@ -93,7 +93,7 @@
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\
ASMALIGN(4) \ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
"movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
...@@ -148,7 +148,7 @@ ...@@ -148,7 +148,7 @@
#define YSCALEYUV2YV121 \ #define YSCALEYUV2YV121 \
"mov %2, %%"REG_a" \n\t"\ "mov %2, %%"REG_a" \n\t"\
ASMALIGN(4) /* FIXME Unroll? */\ ".p2align 4 \n\t" /* FIXME Unroll? */\
"1: \n\t"\ "1: \n\t"\
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
...@@ -164,7 +164,7 @@ ...@@ -164,7 +164,7 @@
"pcmpeqw %%mm7, %%mm7 \n\t"\ "pcmpeqw %%mm7, %%mm7 \n\t"\
"psrlw $15, %%mm7 \n\t"\ "psrlw $15, %%mm7 \n\t"\
"psllw $6, %%mm7 \n\t"\ "psllw $6, %%mm7 \n\t"\
ASMALIGN(4) /* FIXME Unroll? */\ ".p2align 4 \n\t" /* FIXME Unroll? */\
"1: \n\t"\ "1: \n\t"\
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
...@@ -187,14 +187,14 @@ ...@@ -187,14 +187,14 @@
#define YSCALEYUV2PACKEDX_UV \ #define YSCALEYUV2PACKEDX_UV \
__asm__ volatile(\ __asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\ "xor %%"REG_a", %%"REG_a" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"nop \n\t"\ "nop \n\t"\
"1: \n\t"\ "1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\ "movq %%mm3, %%mm4 \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
...@@ -213,7 +213,7 @@ ...@@ -213,7 +213,7 @@
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\ "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
"movq "#dst1", "#dst2" \n\t"\ "movq "#dst1", "#dst2" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\ "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\ "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\
...@@ -241,7 +241,7 @@ ...@@ -241,7 +241,7 @@
#define YSCALEYUV2PACKEDX_ACCURATE_UV \ #define YSCALEYUV2PACKEDX_ACCURATE_UV \
__asm__ volatile(\ __asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\ "xor %%"REG_a", %%"REG_a" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"nop \n\t"\ "nop \n\t"\
"1: \n\t"\ "1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
...@@ -250,7 +250,7 @@ ...@@ -250,7 +250,7 @@
"pxor %%mm5, %%mm5 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
...@@ -295,7 +295,7 @@ ...@@ -295,7 +295,7 @@
"pxor %%mm5, %%mm5 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
...@@ -381,7 +381,7 @@ ...@@ -381,7 +381,7 @@
"movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\ "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
"movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\ "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
...@@ -413,7 +413,7 @@ ...@@ -413,7 +413,7 @@
#define REAL_YSCALEYUV2RGB_UV(index, c) \ #define REAL_YSCALEYUV2RGB_UV(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
...@@ -488,7 +488,7 @@ ...@@ -488,7 +488,7 @@
#define REAL_YSCALEYUV2PACKED1(index, c) \ #define REAL_YSCALEYUV2PACKED1(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
...@@ -503,7 +503,7 @@ ...@@ -503,7 +503,7 @@
#define REAL_YSCALEYUV2RGB1(index, c) \ #define REAL_YSCALEYUV2RGB1(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
...@@ -552,7 +552,7 @@ ...@@ -552,7 +552,7 @@
#define REAL_YSCALEYUV2PACKED1b(index, c) \ #define REAL_YSCALEYUV2PACKED1b(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
...@@ -571,7 +571,7 @@ ...@@ -571,7 +571,7 @@
// do vertical chrominance interpolation // do vertical chrominance interpolation
#define REAL_YSCALEYUV2RGB1b(index, c) \ #define REAL_YSCALEYUV2RGB1b(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
...@@ -2055,7 +2055,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in ...@@ -2055,7 +2055,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ... "push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t" "mov %%"REG_a", %%"REG_BP" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t" "movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
...@@ -2099,7 +2099,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in ...@@ -2099,7 +2099,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ... "push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t" "mov %%"REG_a", %%"REG_BP" \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t" "movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
...@@ -2150,7 +2150,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in ...@@ -2150,7 +2150,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
dst-= counter/2; dst-= counter/2;
__asm__ volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"mov %2, %%"REG_c" \n\t" "mov %2, %%"REG_c" \n\t"
"movzwl (%%"REG_c", %0), %%eax \n\t" "movzwl (%%"REG_c", %0), %%eax \n\t"
...@@ -2335,7 +2335,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, ...@@ -2335,7 +2335,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
"xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"REG_a", %%"REG_a" \n\t" // i
"xor %%"REG_d", %%"REG_d" \n\t" // xx "xor %%"REG_d", %%"REG_d" \n\t" // xx
"xorl %%ecx, %%ecx \n\t" // xalpha "xorl %%ecx, %%ecx \n\t" // xalpha
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx] "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1] "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
...@@ -2475,7 +2475,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, ...@@ -2475,7 +2475,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
"xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"REG_a", %%"REG_a" \n\t" // i
"xor %%"REG_d", %%"REG_d" \n\t" // xx "xor %%"REG_d", %%"REG_d" \n\t" // xx
"xorl %%ecx, %%ecx \n\t" // xalpha "xorl %%ecx, %%ecx \n\t" // xalpha
ASMALIGN(4) ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"mov %0, %%"REG_S" \n\t" "mov %0, %%"REG_S" \n\t"
"movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx] "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment