Commit 80dd0aff authored by mplayer developers's avatar mplayer developers Committed by Michael Niedermayer

libavfilter/libmpcodecs: sync existing filters with mplayer HEAD...

libavfilter/libmpcodecs: sync existing filters with mplayer HEAD (ebcacb8b3ca91ef90acb93785b62fd8c5e5dae41)

Authors from svn:
cehoyos (2):
      Support playback of JPEG 2000 digital cinema files.
      Add name for image format IMGFMT_440P.

ib (1):
      Get rid of VOCTRL_GUI_NOWINDOW.

Matt Oliver (4):
      Fix libmpcodecs inline asm on ICL.
      Use DECLARE_ALIGNED helper macros.
      Remove some superfluous commas from inline asm for better compatibility.
      Use numeric labels in inline asm for consistency and better     compatibility.

reimar (10):
      Fix bpp calculation for XYZ format.
      Avoid duplicating the mouse autohide code.
      Add NV12/NV21 support to some helper functions.
      Add support for rotating the video via OpenGL.
      Add options to determine where borders will be added when adjusting for aspect.
      Apply forgotten move of apply_border_pos function.
      Extract window creation code to common file.
      Make VDPAU support work again with latest FFmpeg.
      img_format: document why mp_get_chroma_shift does not work for NV12/NV21
      Minor spelling/grammar fixes.

For detailed line by line authorship please see svn log of mplayer
svn://svn.mplayerhq.hu/mplayer/trunkSigned-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent eb434f71
...@@ -52,6 +52,8 @@ const char *ff_vo_format_name(int format) ...@@ -52,6 +52,8 @@ const char *ff_vo_format_name(int format)
case IMGFMT_BGRA: return "BGRA"; case IMGFMT_BGRA: return "BGRA";
case IMGFMT_ARGB: return "ARGB"; case IMGFMT_ARGB: return "ARGB";
case IMGFMT_RGBA: return "RGBA"; case IMGFMT_RGBA: return "RGBA";
case IMGFMT_XYZ12LE: return "XYZ 36-bit LE";
case IMGFMT_XYZ12BE: return "XYZ 36-bit BE";
case IMGFMT_GBR24P: return "Planar GBR 24-bit"; case IMGFMT_GBR24P: return "Planar GBR 24-bit";
case IMGFMT_GBR12P: return "Planar GBR 36-bit"; case IMGFMT_GBR12P: return "Planar GBR 36-bit";
case IMGFMT_GBR14P: return "Planar GBR 42-bit"; case IMGFMT_GBR14P: return "Planar GBR 42-bit";
...@@ -102,6 +104,7 @@ const char *ff_vo_format_name(int format) ...@@ -102,6 +104,7 @@ const char *ff_vo_format_name(int format)
case IMGFMT_422P: return "Planar 422P"; case IMGFMT_422P: return "Planar 422P";
case IMGFMT_422A: return "Planar 422P with alpha"; case IMGFMT_422A: return "Planar 422P with alpha";
case IMGFMT_411P: return "Planar 411P"; case IMGFMT_411P: return "Planar 411P";
case IMGFMT_440P: return "Planar 440P";
case IMGFMT_NV12: return "Planar NV12"; case IMGFMT_NV12: return "Planar NV12";
case IMGFMT_NV21: return "Planar NV21"; case IMGFMT_NV21: return "Planar NV21";
case IMGFMT_HM12: return "Planar NV12 Macroblock"; case IMGFMT_HM12: return "Planar NV12 Macroblock";
...@@ -218,6 +221,14 @@ int ff_mp_get_chroma_shift(int format, int *x_shift, int *y_shift, int *componen ...@@ -218,6 +221,14 @@ int ff_mp_get_chroma_shift(int format, int *x_shift, int *y_shift, int *componen
xs = 31; xs = 31;
ys = 31; ys = 31;
break; break;
case IMGFMT_NV12:
case IMGFMT_NV21:
xs = 1;
ys = 1;
// TODO: allowing this though currently breaks
// things all over the place.
err = 1;
break;
default: default:
err = 1; err = 1;
break; break;
......
...@@ -51,6 +51,11 @@ ...@@ -51,6 +51,11 @@
#define IMGFMT_BGR24 (IMGFMT_BGR|24) #define IMGFMT_BGR24 (IMGFMT_BGR|24)
#define IMGFMT_BGR32 (IMGFMT_BGR|32) #define IMGFMT_BGR32 (IMGFMT_BGR|32)
#define IMGFMT_XYZ_MASK 0xFFFFFF00
#define IMGFMT_XYZ (('X'<<24)|('Y'<<16)|('Z'<<8))
#define IMGFMT_XYZ12LE (IMGFMT_XYZ|12)
#define IMGFMT_XYZ12BE (IMGFMT_XYZ|12|128)
#define IMGFMT_GBR24P (('G'<<24)|('B'<<16)|('R'<<8)|24) #define IMGFMT_GBR24P (('G'<<24)|('B'<<16)|('R'<<8)|24)
#define IMGFMT_GBR12PLE (('G'<<24)|('B'<<16)|('R'<<8)|36) #define IMGFMT_GBR12PLE (('G'<<24)|('B'<<16)|('R'<<8)|36)
#define IMGFMT_GBR12PBE (('G'<<24)|('B'<<16)|('R'<<8)|36|128) #define IMGFMT_GBR12PBE (('G'<<24)|('B'<<16)|('R'<<8)|36|128)
...@@ -76,6 +81,7 @@ ...@@ -76,6 +81,7 @@
#define IMGFMT_BGR15LE (IMGFMT_BGR15|128) #define IMGFMT_BGR15LE (IMGFMT_BGR15|128)
#define IMGFMT_BGR16BE IMGFMT_BGR16 #define IMGFMT_BGR16BE IMGFMT_BGR16
#define IMGFMT_BGR16LE (IMGFMT_BGR16|128) #define IMGFMT_BGR16LE (IMGFMT_BGR16|128)
#define IMGFMT_XYZ12 IMGFMT_XYZ12BE
#define IMGFMT_GBR12P IMGFMT_GBR12PBE #define IMGFMT_GBR12P IMGFMT_GBR12PBE
#define IMGFMT_GBR14P IMGFMT_GBR14PBE #define IMGFMT_GBR14P IMGFMT_GBR14PBE
#else #else
...@@ -97,6 +103,7 @@ ...@@ -97,6 +103,7 @@
#define IMGFMT_BGR15LE IMGFMT_BGR15 #define IMGFMT_BGR15LE IMGFMT_BGR15
#define IMGFMT_BGR16BE (IMGFMT_BGR16|128) #define IMGFMT_BGR16BE (IMGFMT_BGR16|128)
#define IMGFMT_BGR16LE IMGFMT_BGR16 #define IMGFMT_BGR16LE IMGFMT_BGR16
#define IMGFMT_XYZ12 IMGFMT_XYZ12LE
#define IMGFMT_GBR12P IMGFMT_GBR12PLE #define IMGFMT_GBR12P IMGFMT_GBR12PLE
#define IMGFMT_GBR14P IMGFMT_GBR14PLE #define IMGFMT_GBR14P IMGFMT_GBR14PLE
#endif #endif
...@@ -107,9 +114,11 @@ ...@@ -107,9 +114,11 @@
#define IMGFMT_IS_RGB(fmt) (((fmt)&IMGFMT_RGB_MASK)==IMGFMT_RGB) #define IMGFMT_IS_RGB(fmt) (((fmt)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
#define IMGFMT_IS_BGR(fmt) (((fmt)&IMGFMT_BGR_MASK)==IMGFMT_BGR) #define IMGFMT_IS_BGR(fmt) (((fmt)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
#define IMGFMT_IS_XYZ(fmt) (((fmt)&IMGFMT_XYZ_MASK)==IMGFMT_XYZ)
#define IMGFMT_RGB_DEPTH(fmt) ((fmt)&0x7F) #define IMGFMT_RGB_DEPTH(fmt) ((fmt)&0x7F)
#define IMGFMT_BGR_DEPTH(fmt) ((fmt)&0x7F) #define IMGFMT_BGR_DEPTH(fmt) ((fmt)&0x7F)
#define IMGFMT_XYZ_DEPTH(fmt) ((fmt)&0x7F)
/* Planar YUV Formats */ /* Planar YUV Formats */
......
...@@ -30,11 +30,19 @@ ...@@ -30,11 +30,19 @@
#include "../img_format.h" #include "../img_format.h"
//#include "vidix/vidix.h" //#include "vidix/vidix.h"
#define ROTATE(t, x, y) do { \
t rot_tmp = x; \
x = y; \
y = -rot_tmp; \
} while(0)
#define VO_EVENT_EXPOSE 1 #define VO_EVENT_EXPOSE 1
#define VO_EVENT_RESIZE 2 #define VO_EVENT_RESIZE 2
#define VO_EVENT_KEYPRESS 4 #define VO_EVENT_KEYPRESS 4
#define VO_EVENT_REINIT 8 #define VO_EVENT_REINIT 8
#define VO_EVENT_MOVE 16 #define VO_EVENT_MOVE 16
#define VO_EVENT_MOUSE 32
/* Obsolete: VOCTRL_QUERY_VAA 1 */ /* Obsolete: VOCTRL_QUERY_VAA 1 */
/* does the device support the required format */ /* does the device support the required format */
...@@ -43,7 +51,6 @@ ...@@ -43,7 +51,6 @@
#define VOCTRL_RESET 3 #define VOCTRL_RESET 3
/* true if vo driver can use GUI created windows */ /* true if vo driver can use GUI created windows */
#define VOCTRL_GUISUPPORT 4 #define VOCTRL_GUISUPPORT 4
#define VOCTRL_GUI_NOWINDOW 19
/* used to switch to fullscreen */ /* used to switch to fullscreen */
#define VOCTRL_FULLSCREEN 5 #define VOCTRL_FULLSCREEN 5
/* signal a device pause */ /* signal a device pause */
...@@ -63,7 +70,6 @@ ...@@ -63,7 +70,6 @@
/* equalizer controls */ /* equalizer controls */
#define VOCTRL_SET_EQUALIZER 17 #define VOCTRL_SET_EQUALIZER 17
#define VOCTRL_GET_EQUALIZER 18 #define VOCTRL_GET_EQUALIZER 18
//#define VOCTRL_GUI_NOWINDOW 19
/* Frame duplication */ /* Frame duplication */
#define VOCTRL_DUPLICATE_FRAME 20 #define VOCTRL_DUPLICATE_FRAME 20
// ... 21 // ... 21
...@@ -107,6 +113,7 @@ typedef struct { ...@@ -107,6 +113,7 @@ typedef struct {
#define VOFLAG_FLIPPING 0x08 #define VOFLAG_FLIPPING 0x08
#define VOFLAG_HIDDEN 0x10 //< Use to create a hidden window #define VOFLAG_HIDDEN 0x10 //< Use to create a hidden window
#define VOFLAG_STEREO 0x20 //< Use to create a stereo-capable window #define VOFLAG_STEREO 0x20 //< Use to create a stereo-capable window
#define VOFLAG_DEPTH 0x40 //< Request a depth buffer
#define VOFLAG_XOVERLAY_SUB_VO 0x10000 #define VOFLAG_XOVERLAY_SUB_VO 0x10000
typedef struct vo_info_s typedef struct vo_info_s
...@@ -223,6 +230,9 @@ extern int vo_directrendering; ...@@ -223,6 +230,9 @@ extern int vo_directrendering;
extern int vo_vsync; extern int vo_vsync;
extern int vo_fsmode; extern int vo_fsmode;
extern float vo_panscan; extern float vo_panscan;
extern float vo_border_pos_x;
extern float vo_border_pos_y;
extern int vo_rotate;
extern int vo_adapter_num; extern int vo_adapter_num;
extern int vo_refresh_rate; extern int vo_refresh_rate;
extern int vo_keepaspect; extern int vo_keepaspect;
...@@ -278,4 +288,13 @@ void calc_src_dst_rects(int src_width, int src_height, struct vo_rect *src, stru ...@@ -278,4 +288,13 @@ void calc_src_dst_rects(int src_width, int src_height, struct vo_rect *src, stru
struct vo_rect *borders, const struct vo_rect *crop); struct vo_rect *borders, const struct vo_rect *crop);
void vo_mouse_movement(int posx, int posy); void vo_mouse_movement(int posx, int posy);
static inline int apply_border_pos(int full, int part, float pos) {
if (pos >= 0.0 && pos <= 1.0) {
return pos*(full - part);
}
if (pos < 0)
return pos * part;
return full - part + (pos - 1) * part;
}
#endif /* MPLAYER_VIDEO_OUT_H */ #endif /* MPLAYER_VIDEO_OUT_H */
...@@ -128,6 +128,10 @@ void ff_mp_image_setfmt(mp_image_t* mpi,unsigned int out_fmt){ ...@@ -128,6 +128,10 @@ void ff_mp_image_setfmt(mp_image_t* mpi,unsigned int out_fmt){
mpi->flags|=MP_IMGFLAG_SWAPPED; mpi->flags|=MP_IMGFLAG_SWAPPED;
return; return;
} }
if (IMGFMT_IS_XYZ(out_fmt)) {
mpi->bpp=3*((IMGFMT_XYZ_DEPTH(out_fmt) + 7) & ~7);
return;
}
mpi->num_planes=3; mpi->num_planes=3;
if (out_fmt == IMGFMT_GBR24P) { if (out_fmt == IMGFMT_GBR24P) {
mpi->bpp=24; mpi->bpp=24;
...@@ -243,7 +247,7 @@ mp_image_t* ff_new_mp_image(int w,int h){ ...@@ -243,7 +247,7 @@ mp_image_t* ff_new_mp_image(int w,int h){
void ff_free_mp_image(mp_image_t* mpi){ void ff_free_mp_image(mp_image_t* mpi){
if(!mpi) return; if(!mpi) return;
if(mpi->flags&MP_IMGFLAG_ALLOCATED){ if(mpi->flags&MP_IMGFLAG_ALLOCATED){
/* becouse we allocate the whole image in once */ /* because we allocate the whole image at once */
av_free(mpi->planes[0]); av_free(mpi->planes[0]);
if (mpi->flags & MP_IMGFLAG_RGB_PALETTE) if (mpi->flags & MP_IMGFLAG_RGB_PALETTE)
av_free(mpi->planes[1]); av_free(mpi->planes[1]);
......
...@@ -75,7 +75,7 @@ static const short custom_threshold[64]= ...@@ -75,7 +75,7 @@ static const short custom_threshold[64]=
20, 27, 26, 23, 20, 15, 11, 5 20, 27, 26, 23, 20, 15, 11, 5
}; };
static const uint8_t __attribute__((aligned(32))) dither[8][8]={ DECLARE_ASM_CONST(32, uint8_t, dither)[8][8]={
{ 0, 48, 12, 60, 3, 51, 15, 63, }, { 0, 48, 12, 60, 3, 51, 15, 63, },
{ 32, 16, 44, 28, 35, 19, 47, 31, }, { 32, 16, 44, 28, 35, 19, 47, 31, },
{ 8, 56, 4, 52, 11, 59, 7, 55, }, { 8, 56, 4, 52, 11, 59, 7, 55, },
...@@ -215,11 +215,11 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long sr ...@@ -215,11 +215,11 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long sr
"psraw %%mm5, %%mm3 \n\t" "psraw %%mm5, %%mm3 \n\t"
"psraw %%mm5, %%mm4 \n\t" "psraw %%mm5, %%mm4 \n\t"
"1: \n\t" "1: \n\t"
"movq %%mm7, (%%"REG_S",%%"REG_a",) \n\t" "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t"
"movq (%%"REG_S"), %%mm0 \n\t" "movq (%%"REG_S"), %%mm0 \n\t"
"movq 8(%%"REG_S"), %%mm1 \n\t" "movq 8(%%"REG_S"), %%mm1 \n\t"
"movq %%mm7, 8(%%"REG_S",%%"REG_a",) \n\t" "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t"
"paddw %%mm3, %%mm0 \n\t" "paddw %%mm3, %%mm0 \n\t"
"paddw %%mm4, %%mm1 \n\t" "paddw %%mm4, %%mm1 \n\t"
...@@ -286,15 +286,15 @@ static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long s ...@@ -286,15 +286,15 @@ static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long s
"movq 8(%%"REG_S"), %%mm1 \n\t" "movq 8(%%"REG_S"), %%mm1 \n\t"
"paddw %%mm3, %%mm0 \n\t" "paddw %%mm3, %%mm0 \n\t"
"paddw (%%"REG_S",%%"REG_a",), %%mm0 \n\t" "paddw (%%"REG_S",%%"REG_a"), %%mm0 \n\t"
"paddw %%mm4, %%mm1 \n\t" "paddw %%mm4, %%mm1 \n\t"
"movq 8(%%"REG_S",%%"REG_a",), %%mm6 \n\t" "movq 8(%%"REG_S",%%"REG_a"), %%mm6 \n\t"
"movq %%mm7, (%%"REG_S",%%"REG_a",) \n\t" "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t"
"psraw %%mm2, %%mm0 \n\t" "psraw %%mm2, %%mm0 \n\t"
"paddw %%mm6, %%mm1 \n\t" "paddw %%mm6, %%mm1 \n\t"
"movq %%mm7, 8(%%"REG_S",%%"REG_a",) \n\t" "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t"
"psraw %%mm2, %%mm1 \n\t" "psraw %%mm2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t" "packuswb %%mm1, %%mm0 \n\t"
...@@ -416,7 +416,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, ...@@ -416,7 +416,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src,
const int stride= is_luma ? p->temp_stride : (width+16);//((width+16+15)&(~15)) const int stride= is_luma ? p->temp_stride : (width+16);//((width+16+15)&(~15))
const int step=6-p->log2_count; const int step=6-p->log2_count;
const int qps= 3 + is_luma; const int qps= 3 + is_luma;
int32_t __attribute__((aligned(32))) block_align[4*8*BLOCKSZ+ 4*8*BLOCKSZ]; DECLARE_ALIGNED(32, int32_t, block_align)[4*8*BLOCKSZ+ 4*8*BLOCKSZ];
int16_t *block= (int16_t *)block_align; int16_t *block= (int16_t *)block_align;
int16_t *block3=(int16_t *)(block_align+4*8*BLOCKSZ); int16_t *block3=(int16_t *)(block_align+4*8*BLOCKSZ);
...@@ -873,7 +873,7 @@ static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int ...@@ -873,7 +873,7 @@ static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int
static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt) static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt)
{ {
uint64_t __attribute__((aligned(8))) temps[4]; DECLARE_ALIGNED(8, uint64_t, temps)[4];
__asm__ volatile( __asm__ volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
...@@ -1598,6 +1598,10 @@ static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, ...@@ -1598,6 +1598,10 @@ static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output,
: "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps) : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps)
: "d"(thr_adr) : "d"(thr_adr)
NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,MM_2,MM_FIX_1_414213562_A,MM_FIX_1_414213562,MM_FIX_0_382683433,
ff_MM_FIX_0_541196100,MM_FIX_1_306562965,MM_FIX_0_847759065)
NAMED_CONSTRAINTS_ADD(MM_FIX_0_566454497,MM_FIX_0_198912367,MM_FIX_2_613125930,MM_FIX_1_847759065,
MM_FIX_1_082392200,ff_MM_FIX_0_541196100,MM_FIX_1_306562965)
: "%"REG_a : "%"REG_a
); );
} }
...@@ -1674,7 +1678,7 @@ static void row_idct_c(int16_t* workspace, ...@@ -1674,7 +1678,7 @@ static void row_idct_c(int16_t* workspace,
static void row_idct_mmx (int16_t* workspace, static void row_idct_mmx (int16_t* workspace,
int16_t* output_adr, int output_stride, int cnt) int16_t* output_adr, int output_stride, int cnt)
{ {
uint64_t __attribute__((aligned(8))) temps[4]; DECLARE_ALIGNED(8, uint64_t, temps)[4];
__asm__ volatile( __asm__ volatile(
"lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
"1: \n\t" "1: \n\t"
...@@ -1816,7 +1820,7 @@ static void row_idct_mmx (int16_t* workspace, ...@@ -1816,7 +1820,7 @@ static void row_idct_mmx (int16_t* workspace,
"paddw (%%"REG_D"), %%mm5 \n\t" "paddw (%%"REG_D"), %%mm5 \n\t"
"psraw $3, %%mm7 \n\t" "psraw $3, %%mm7 \n\t"
"paddw (%%"REG_D",%%"REG_a",), %%mm1 \n\t" "paddw (%%"REG_D",%%"REG_a"), %%mm1 \n\t"
"paddw %%mm2, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
"paddw (%%"REG_D",%%"REG_a",2), %%mm7 \n\t" "paddw (%%"REG_D",%%"REG_a",2), %%mm7 \n\t"
...@@ -1825,7 +1829,7 @@ static void row_idct_mmx (int16_t* workspace, ...@@ -1825,7 +1829,7 @@ static void row_idct_mmx (int16_t* workspace,
"movq %%mm5, (%%"REG_D") \n\t" "movq %%mm5, (%%"REG_D") \n\t"
"paddw %%mm2, %%mm6 \n\t" "paddw %%mm2, %%mm6 \n\t"
"movq %%mm1, (%%"REG_D",%%"REG_a",) \n\t" "movq %%mm1, (%%"REG_D",%%"REG_a") \n\t"
"psraw $3, %%mm0 \n\t" "psraw $3, %%mm0 \n\t"
"movq %%mm7, (%%"REG_D",%%"REG_a",2) \n\t" "movq %%mm7, (%%"REG_D",%%"REG_a",2) \n\t"
...@@ -1837,7 +1841,7 @@ static void row_idct_mmx (int16_t* workspace, ...@@ -1837,7 +1841,7 @@ static void row_idct_mmx (int16_t* workspace,
"paddw (%%"REG_D",%%"REG_a",2), %%mm0 \n\t" "paddw (%%"REG_D",%%"REG_a",2), %%mm0 \n\t"
"psubw %%mm4, %%mm5 \n\t" //d3 "psubw %%mm4, %%mm5 \n\t" //d3
"paddw (%%"REG_D",%%"REG_d",), %%mm3 \n\t" "paddw (%%"REG_D",%%"REG_d"), %%mm3 \n\t"
"psraw $3, %%mm6 \n\t" "psraw $3, %%mm6 \n\t"
"paddw 1*8+%3, %%mm4 \n\t" //d4 "paddw 1*8+%3, %%mm4 \n\t" //d4
...@@ -1852,13 +1856,13 @@ static void row_idct_mmx (int16_t* workspace, ...@@ -1852,13 +1856,13 @@ static void row_idct_mmx (int16_t* workspace,
"paddw (%%"REG_D"), %%mm5 \n\t" "paddw (%%"REG_D"), %%mm5 \n\t"
"psraw $3, %%mm4 \n\t" "psraw $3, %%mm4 \n\t"
"paddw (%%"REG_D",%%"REG_a",), %%mm4 \n\t" "paddw (%%"REG_D",%%"REG_a"), %%mm4 \n\t"
"add $"DCTSIZE_S"*2*4, %%"REG_S" \n\t" //4 rows "add $"DCTSIZE_S"*2*4, %%"REG_S" \n\t" //4 rows
"movq %%mm3, (%%"REG_D",%%"REG_d",) \n\t" "movq %%mm3, (%%"REG_D",%%"REG_d") \n\t"
"movq %%mm6, (%%"REG_D",%%"REG_a",4) \n\t" "movq %%mm6, (%%"REG_D",%%"REG_a",4) \n\t"
"movq %%mm5, (%%"REG_D") \n\t" "movq %%mm5, (%%"REG_D") \n\t"
"movq %%mm4, (%%"REG_D",%%"REG_a",) \n\t" "movq %%mm4, (%%"REG_D",%%"REG_a") \n\t"
"sub %%"REG_d", %%"REG_D" \n\t" "sub %%"REG_d", %%"REG_D" \n\t"
"add $8, %%"REG_D" \n\t" "add $8, %%"REG_D" \n\t"
...@@ -1867,6 +1871,8 @@ static void row_idct_mmx (int16_t* workspace, ...@@ -1867,6 +1871,8 @@ static void row_idct_mmx (int16_t* workspace,
: "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps) : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps)
: "a"(output_stride*sizeof(short)) : "a"(output_stride*sizeof(short))
NAMED_CONSTRAINTS_ADD(MM_FIX_1_414213562_A,MM_FIX_2_613125930,MM_FIX_1_847759065,MM_FIX_1_082392200,
MM_FIX_1_414213562,MM_DESCALE_RND)
: "%"REG_d : "%"REG_d
); );
} }
...@@ -1940,14 +1946,14 @@ static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int ...@@ -1940,14 +1946,14 @@ static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int
static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, int cnt) static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, int cnt)
{ {
uint64_t __attribute__((aligned(8))) temps[4]; DECLARE_ALIGNED(8, uint64_t, temps)[4];
__asm__ volatile( __asm__ volatile(
"lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
"6: \n\t" "6: \n\t"
"movd (%%"REG_S"), %%mm0 \n\t" "movd (%%"REG_S"), %%mm0 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"movd (%%"REG_S",%%"REG_a",), %%mm1 \n\t" "movd (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"movd (%%"REG_S",%%"REG_a",2), %%mm2 \n\t" "movd (%%"REG_S",%%"REG_a",2), %%mm2 \n\t"
...@@ -1962,7 +1968,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, ...@@ -1962,7 +1968,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
"movd (%%"REG_S",%%"REG_a",4), %%mm3 \n\t" //7 ;prefetch! "movd (%%"REG_S",%%"REG_a",4), %%mm3 \n\t" //7 ;prefetch!
"movq %%mm1, %%mm6 \n\t" "movq %%mm1, %%mm6 \n\t"
"movd (%%"REG_S",%%"REG_d",), %%mm4 \n\t" //6 "movd (%%"REG_S",%%"REG_d"), %%mm4 \n\t" //6
"punpcklbw %%mm7, %%mm3 \n\t" "punpcklbw %%mm7, %%mm3 \n\t"
"psubw %%mm3, %%mm5 \n\t" "psubw %%mm3, %%mm5 \n\t"
...@@ -1974,16 +1980,16 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, ...@@ -1974,16 +1980,16 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
"movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5 "movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5
"paddw %%mm4, %%mm1 \n\t" "paddw %%mm4, %%mm1 \n\t"
"movq %%mm5, 0*8+%3 \n\t" //t7 "movq %%mm5, %3 \n\t" //t7
"punpcklbw %%mm7, %%mm3 \n\t" "punpcklbw %%mm7, %%mm3 \n\t"
"movq %%mm6, 1*8+%3 \n\t" //t6 "movq %%mm6, %4 \n\t" //t6
"movq %%mm2, %%mm4 \n\t" "movq %%mm2, %%mm4 \n\t"
"movd (%%"REG_S"), %%mm5 \n\t" //3 "movd (%%"REG_S"), %%mm5 \n\t" //3
"paddw %%mm3, %%mm2 \n\t" "paddw %%mm3, %%mm2 \n\t"
"movd (%%"REG_S",%%"REG_a",), %%mm6 \n\t" //4 "movd (%%"REG_S",%%"REG_a"), %%mm6 \n\t" //4
"punpcklbw %%mm7, %%mm5 \n\t" "punpcklbw %%mm7, %%mm5 \n\t"
"psubw %%mm3, %%mm4 \n\t" "psubw %%mm3, %%mm4 \n\t"
...@@ -2023,7 +2029,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, ...@@ -2023,7 +2029,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
"psubw %%mm1, %%mm5 \n\t" //d1 "psubw %%mm1, %%mm5 \n\t" //d1
"movq %%mm0, %%mm6 \n\t" "movq %%mm0, %%mm6 \n\t"
"movq 1*8+%3, %%mm1 \n\t" "movq %4, %%mm1 \n\t"
"punpcklwd %%mm5, %%mm0 \n\t" "punpcklwd %%mm5, %%mm0 \n\t"
"punpckhwd %%mm5, %%mm6 \n\t" "punpckhwd %%mm5, %%mm6 \n\t"
...@@ -2047,7 +2053,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, ...@@ -2047,7 +2053,7 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
"movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
"psllw $2, %%mm3 \n\t" //t10 "psllw $2, %%mm3 \n\t" //t10
"movq 0*8+%3, %%mm2 \n\t" "movq %3, %%mm2 \n\t"
"psllw $2, %%mm4 \n\t" //t11 "psllw $2, %%mm4 \n\t" //t11
"pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3 "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3
...@@ -2110,8 +2116,9 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, ...@@ -2110,8 +2116,9 @@ static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size,
"dec %%"REG_c" \n\t" "dec %%"REG_c" \n\t"
"jnz 6b \n\t" "jnz 6b \n\t"
: "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps) : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps), "=o"(temps[1])
: "a"(line_size) : "a"(line_size)
NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,ff_MM_FIX_0_541196100,MM_FIX_0_382683433,MM_FIX_1_306562965)
: "%"REG_d); : "%"REG_d);
} }
......
...@@ -125,7 +125,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, ...@@ -125,7 +125,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
ASMALIGN(4) ASMALIGN(4)
".Lli0: \n\t" "2: \n\t"
"movq (%%"REG_S"), %%mm1 \n\t" "movq (%%"REG_S"), %%mm1 \n\t"
"movq (%%"REG_S"), %%mm2 \n\t" "movq (%%"REG_S"), %%mm2 \n\t"
...@@ -204,7 +204,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, ...@@ -204,7 +204,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
"add $32, %%"REG_D" \n\t" "add $32, %%"REG_D" \n\t"
"decl %%ecx \n\t" "decl %%ecx \n\t"
"jnz .Lli0 \n\t" "jnz 2b \n\t"
"emms \n\t" "emms \n\t"
"pop %%"REG_BP" \n\t" "pop %%"REG_BP" \n\t"
: :
...@@ -233,7 +233,7 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, ...@@ -233,7 +233,7 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
ASMALIGN(4) ASMALIGN(4)
".Lli1: \n\t" "3: \n\t"
"movq (%%"REG_S"), %%mm1 \n\t" "movq (%%"REG_S"), %%mm1 \n\t"
"movq (%%"REG_S"), %%mm2 \n\t" "movq (%%"REG_S"), %%mm2 \n\t"
...@@ -316,7 +316,7 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, ...@@ -316,7 +316,7 @@ static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
"add $32, %%"REG_D" \n\t" "add $32, %%"REG_D" \n\t"
"decl %%ecx \n\t" "decl %%ecx \n\t"
"jnz .Lli1 \n\t" "jnz 3b \n\t"
"emms \n\t" "emms \n\t"
"pop %%"REG_BP" \n\t" "pop %%"REG_BP" \n\t"
: :
......
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
#define XMAX(a,b) ((a) > (b) ? (a) : (b)) #define XMAX(a,b) ((a) > (b) ? (a) : (b))
//===========================================================================// //===========================================================================//
static const uint8_t __attribute__((aligned(8))) dither[8][8]={ DECLARE_ASM_CONST(8, uint8_t, dither)[8][8]={
{ 0, 48, 12, 60, 3, 51, 15, 63, }, { 0, 48, 12, 60, 3, 51, 15, 63, },
{ 32, 16, 44, 28, 35, 19, 47, 31, }, { 32, 16, 44, 28, 35, 19, 47, 31, },
{ 8, 56, 4, 52, 11, 59, 7, 55, }, { 8, 56, 4, 52, 11, 59, 7, 55, },
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
#define BLOCK 16 #define BLOCK 16
//===========================================================================// //===========================================================================//
static const uint8_t __attribute__((aligned(8))) dither[8][8]={ DECLARE_ASM_CONST(8, uint8_t, dither)[8][8]={
{ 0*4, 48*4, 12*4, 60*4, 3*4, 51*4, 15*4, 63*4, }, { 0*4, 48*4, 12*4, 60*4, 3*4, 51*4, 15*4, 63*4, },
{ 32*4, 16*4, 44*4, 28*4, 35*4, 19*4, 47*4, 31*4, }, { 32*4, 16*4, 44*4, 28*4, 35*4, 19*4, 47*4, 31*4, },
{ 8*4, 56*4, 4*4, 52*4, 11*4, 59*4, 7*4, 55*4, }, { 8*4, 56*4, 4*4, 52*4, 11*4, 59*4, 7*4, 55*4, },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment