Commit ca19862d authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  libxvid: remove disabled code
  qdm2: make a table static const
  qdm2: simplify bitstream reader setup for some subpacket types
  qdm2: use get_bits_left()
  build: Consistently handle conditional compilation for all optimization OBJS.
  avpacket, bfi, bgmc, rawenc: K&R prettyprinting cosmetics
  msrle: convert MS RLE decoding function to bytestream2.
  x86inc improvements for 64-bit

Conflicts:
	common.mak
	libavcodec/avpacket.c
	libavcodec/bfi.c
	libavcodec/msrledec.c
	libavcodec/qdm2.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 5eeecde8 18e8fef7
...@@ -66,7 +66,8 @@ config.h: .config ...@@ -66,7 +66,8 @@ config.h: .config
SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \
ARCH_HEADERS BUILT_HEADERS SKIPHEADERS \ ARCH_HEADERS BUILT_HEADERS SKIPHEADERS \
ALTIVEC-OBJS ARMV6-OBJS MMX-OBJS NEON-OBJS YASM-OBJS \ ALTIVEC-OBJS ARMV5TE-OBJS ARMV6-OBJS ARMVFP-OBJS MMI-OBJS \
MMX-OBJS NEON-OBJS VIS-OBJS YASM-OBJS \
OBJS TESTOBJS OBJS TESTOBJS
define RESET define RESET
......
OBJS-$(HAVE_ARMV5TE) += $(ARMV5TE-OBJS) $(ARMV5TE-OBJS-yes)
OBJS-$(HAVE_ARMV6) += $(ARMV6-OBJS) $(ARMV6-OBJS-yes)
OBJS-$(HAVE_ARMVFP) += $(ARMVFP-OBJS) $(ARMVFP-OBJS-yes)
OBJS-$(HAVE_NEON) += $(NEON-OBJS) $(NEON-OBJS-yes)
OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
OBJS-$(HAVE_VIS) += $(VIS-OBJS) $(VIS-OBJS-yes)
OBJS-$(HAVE_MMX) += $(MMX-OBJS) $(MMX-OBJS-yes)
OBJS-$(HAVE_YASM) += $(YASM-OBJS) $(YASM-OBJS-yes)
...@@ -73,7 +73,7 @@ COMPILE_S = $(call COMPILE,AS) ...@@ -73,7 +73,7 @@ COMPILE_S = $(call COMPILE,AS)
$(OBJS): $(OBJS):
endif endif
OBJS-$(HAVE_MMX) += $(MMX-OBJS-yes) include $(SRC_PATH)/arch.mak
OBJS += $(OBJS-yes) OBJS += $(OBJS-yes)
FFLIBS := $(FFLIBS-yes) $(FFLIBS) FFLIBS := $(FFLIBS-yes) $(FFLIBS)
......
...@@ -34,17 +34,10 @@ ...@@ -34,17 +34,10 @@
typedef struct AascContext { typedef struct AascContext {
AVCodecContext *avctx; AVCodecContext *avctx;
GetByteContext gb;
AVFrame frame; AVFrame frame;
} AascContext; } AascContext;
#define FETCH_NEXT_STREAM_BYTE() \
if (stream_ptr >= buf_size) \
{ \
av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (fetch)\n"); \
break; \
} \
stream_byte = buf[stream_ptr++];
static av_cold int aasc_decode_init(AVCodecContext *avctx) static av_cold int aasc_decode_init(AVCodecContext *avctx)
{ {
AascContext *s = avctx->priv_data; AascContext *s = avctx->priv_data;
...@@ -89,7 +82,8 @@ static int aasc_decode_frame(AVCodecContext *avctx, ...@@ -89,7 +82,8 @@ static int aasc_decode_frame(AVCodecContext *avctx,
} }
break; break;
case 1: case 1:
ff_msrle_decode(avctx, (AVPicture*)&s->frame, 8, buf - 4, buf_size + 4); bytestream2_init(&s->gb, buf - 4, buf_size + 4);
ff_msrle_decode(avctx, (AVPicture*)&s->frame, 8, &s->gb);
break; break;
default: default:
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr); av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
......
...@@ -28,21 +28,19 @@ OBJS += arm/dsputil_init_arm.o \ ...@@ -28,21 +28,19 @@ OBJS += arm/dsputil_init_arm.o \
arm/mpegvideo_arm.o \ arm/mpegvideo_arm.o \
arm/simple_idct_arm.o \ arm/simple_idct_arm.o \
OBJS-$(HAVE_ARMV5TE) += arm/dsputil_init_armv5te.o \ ARMV5TE-OBJS += arm/dsputil_init_armv5te.o \
arm/mpegvideo_armv5te.o \ arm/mpegvideo_armv5te.o \
arm/mpegvideo_armv5te_s.o \ arm/mpegvideo_armv5te_s.o \
arm/simple_idct_armv5te.o \ arm/simple_idct_armv5te.o \
OBJS-$(HAVE_ARMV6) += arm/dsputil_init_armv6.o \ ARMV6-OBJS += arm/dsputil_init_armv6.o \
arm/dsputil_armv6.o \ arm/dsputil_armv6.o \
arm/simple_idct_armv6.o \ arm/simple_idct_armv6.o \
$(ARMV6-OBJS-yes)
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \ ARMVFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \ ARMVFP-OBJS += arm/dsputil_vfp.o \
arm/dsputil_init_vfp.o \ arm/dsputil_init_vfp.o \
$(VFP-OBJS-yes)
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
arm/fft_fixed_neon.o \ arm/fft_fixed_neon.o \
...@@ -84,10 +82,9 @@ NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \ ...@@ -84,10 +82,9 @@ NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \
NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_neon.o NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_neon.o
OBJS-$(HAVE_NEON) += arm/dsputil_init_neon.o \ NEON-OBJS += arm/dsputil_init_neon.o \
arm/dsputil_neon.o \ arm/dsputil_neon.o \
arm/fmtconvert_neon.o \ arm/fmtconvert_neon.o \
arm/int_neon.o \ arm/int_neon.o \
arm/mpegvideo_neon.o \ arm/mpegvideo_neon.o \
arm/simple_idct_neon.o \ arm/simple_idct_neon.o \
$(NEON-OBJS-yes)
...@@ -19,14 +19,15 @@ ...@@ -19,14 +19,15 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "avcodec.h"
#include "internal.h" #include "internal.h"
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
#include "bytestream.h" #include "bytestream.h"
#include "avcodec.h"
void av_destruct_packet_nofree(AVPacket *pkt) void av_destruct_packet_nofree(AVPacket *pkt)
{ {
pkt->data = NULL; pkt->size = 0; pkt->data = NULL;
pkt->size = 0;
pkt->side_data = NULL; pkt->side_data = NULL;
pkt->side_data_elems = 0; pkt->side_data_elems = 0;
} }
...@@ -43,47 +44,49 @@ void ff_packet_free_side_data(AVPacket *pkt) ...@@ -43,47 +44,49 @@ void ff_packet_free_side_data(AVPacket *pkt)
void av_destruct_packet(AVPacket *pkt) void av_destruct_packet(AVPacket *pkt)
{ {
av_free(pkt->data); av_free(pkt->data);
pkt->data = NULL; pkt->size = 0; pkt->data = NULL;
pkt->size = 0;
ff_packet_free_side_data(pkt); ff_packet_free_side_data(pkt);
} }
void av_init_packet(AVPacket *pkt) void av_init_packet(AVPacket *pkt)
{ {
pkt->pts = AV_NOPTS_VALUE; pkt->pts = AV_NOPTS_VALUE;
pkt->dts = AV_NOPTS_VALUE; pkt->dts = AV_NOPTS_VALUE;
pkt->pos = -1; pkt->pos = -1;
pkt->duration = 0; pkt->duration = 0;
pkt->convergence_duration = 0; pkt->convergence_duration = 0;
pkt->flags = 0; pkt->flags = 0;
pkt->stream_index = 0; pkt->stream_index = 0;
pkt->destruct= NULL; pkt->destruct = NULL;
pkt->side_data = NULL; pkt->side_data = NULL;
pkt->side_data_elems = 0; pkt->side_data_elems = 0;
} }
int av_new_packet(AVPacket *pkt, int size) int av_new_packet(AVPacket *pkt, int size)
{ {
uint8_t *data= NULL; uint8_t *data = NULL;
if((unsigned)size < (unsigned)size + FF_INPUT_BUFFER_PADDING_SIZE) if ((unsigned)size < (unsigned)size + FF_INPUT_BUFFER_PADDING_SIZE)
data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE); data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
if (data){ if (data) {
memset(data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE); memset(data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
}else } else
size=0; size = 0;
av_init_packet(pkt); av_init_packet(pkt);
pkt->data = data; pkt->data = data;
pkt->size = size; pkt->size = size;
pkt->destruct = av_destruct_packet; pkt->destruct = av_destruct_packet;
if(!data) if (!data)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
return 0; return 0;
} }
void av_shrink_packet(AVPacket *pkt, int size) void av_shrink_packet(AVPacket *pkt, int size)
{ {
if (pkt->size <= size) return; if (pkt->size <= size)
return;
pkt->size = size; pkt->size = size;
memset(pkt->data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE); memset(pkt->data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
} }
...@@ -94,40 +97,45 @@ int av_grow_packet(AVPacket *pkt, int grow_by) ...@@ -94,40 +97,45 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
av_assert0((unsigned)pkt->size <= INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE); av_assert0((unsigned)pkt->size <= INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE);
if (!pkt->size) if (!pkt->size)
return av_new_packet(pkt, grow_by); return av_new_packet(pkt, grow_by);
if ((unsigned)grow_by > INT_MAX - (pkt->size + FF_INPUT_BUFFER_PADDING_SIZE)) if ((unsigned)grow_by >
INT_MAX - (pkt->size + FF_INPUT_BUFFER_PADDING_SIZE))
return -1; return -1;
new_ptr = av_realloc(pkt->data, pkt->size + grow_by + FF_INPUT_BUFFER_PADDING_SIZE); new_ptr = av_realloc(pkt->data,
pkt->size + grow_by + FF_INPUT_BUFFER_PADDING_SIZE);
if (!new_ptr) if (!new_ptr)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
pkt->data = new_ptr; pkt->data = new_ptr;
pkt->size += grow_by; pkt->size += grow_by;
memset(pkt->data + pkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE); memset(pkt->data + pkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
return 0; return 0;
} }
#define DUP_DATA(dst, src, size, padding) \ #define DUP_DATA(dst, src, size, padding) \
do { \ do { \
void *data; \ void *data; \
if (padding) { \ if (padding) { \
if ((unsigned)(size) > (unsigned)(size) + FF_INPUT_BUFFER_PADDING_SIZE) \ if ((unsigned)(size) > \
goto failed_alloc; \ (unsigned)(size) + FF_INPUT_BUFFER_PADDING_SIZE) \
data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE); \ goto failed_alloc; \
} else { \ data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE); \
data = av_malloc(size); \ } else { \
} \ data = av_malloc(size); \
if (!data) \ } \
goto failed_alloc; \ if (!data) \
memcpy(data, src, size); \ goto failed_alloc; \
if (padding) \ memcpy(data, src, size); \
memset((uint8_t*)data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE); \ if (padding) \
dst = data; \ memset((uint8_t *)data + size, 0, \
} while(0) FF_INPUT_BUFFER_PADDING_SIZE); \
dst = data; \
} while (0)
int av_dup_packet(AVPacket *pkt) int av_dup_packet(AVPacket *pkt)
{ {
AVPacket tmp_pkt; AVPacket tmp_pkt;
if (((pkt->destruct == av_destruct_packet_nofree) || (pkt->destruct == NULL)) && pkt->data) { if (((pkt->destruct == av_destruct_packet_nofree) ||
(pkt->destruct == NULL)) && pkt->data) {
tmp_pkt = *pkt; tmp_pkt = *pkt;
pkt->data = NULL; pkt->data = NULL;
...@@ -140,14 +148,15 @@ int av_dup_packet(AVPacket *pkt) ...@@ -140,14 +148,15 @@ int av_dup_packet(AVPacket *pkt)
DUP_DATA(pkt->side_data, tmp_pkt.side_data, DUP_DATA(pkt->side_data, tmp_pkt.side_data,
pkt->side_data_elems * sizeof(*pkt->side_data), 0); pkt->side_data_elems * sizeof(*pkt->side_data), 0);
memset(pkt->side_data, 0, pkt->side_data_elems * sizeof(*pkt->side_data)); memset(pkt->side_data, 0,
for (i = 0; i < pkt->side_data_elems; i++) { pkt->side_data_elems * sizeof(*pkt->side_data));
for (i = 0; i < pkt->side_data_elems; i++)
DUP_DATA(pkt->side_data[i].data, tmp_pkt.side_data[i].data, DUP_DATA(pkt->side_data[i].data, tmp_pkt.side_data[i].data,
pkt->side_data[i].size, 1); pkt->side_data[i].size, 1);
}
} }
} }
return 0; return 0;
failed_alloc: failed_alloc:
av_destruct_packet(pkt); av_destruct_packet(pkt);
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
...@@ -156,14 +165,16 @@ failed_alloc: ...@@ -156,14 +165,16 @@ failed_alloc:
void av_free_packet(AVPacket *pkt) void av_free_packet(AVPacket *pkt)
{ {
if (pkt) { if (pkt) {
if (pkt->destruct) pkt->destruct(pkt); if (pkt->destruct)
pkt->data = NULL; pkt->size = 0; pkt->destruct(pkt);
pkt->data = NULL;
pkt->size = 0;
pkt->side_data = NULL; pkt->side_data = NULL;
pkt->side_data_elems = 0; pkt->side_data_elems = 0;
} }
} }
uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type, uint8_t *av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
int size) int size)
{ {
int elems = pkt->side_data_elems; int elems = pkt->side_data_elems;
...@@ -173,7 +184,8 @@ uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type, ...@@ -173,7 +184,8 @@ uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
if ((unsigned)size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE) if ((unsigned)size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
return NULL; return NULL;
pkt->side_data = av_realloc(pkt->side_data, (elems + 1) * sizeof(*pkt->side_data)); pkt->side_data = av_realloc(pkt->side_data,
(elems + 1) * sizeof(*pkt->side_data));
if (!pkt->side_data) if (!pkt->side_data)
return NULL; return NULL;
...@@ -187,7 +199,7 @@ uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type, ...@@ -187,7 +199,7 @@ uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
return pkt->side_data[elems].data; return pkt->side_data[elems].data;
} }
uint8_t* av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type, uint8_t *av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
int *size) int *size)
{ {
int i; int i;
......
...@@ -40,9 +40,9 @@ typedef struct BFIContext { ...@@ -40,9 +40,9 @@ typedef struct BFIContext {
static av_cold int bfi_decode_init(AVCodecContext *avctx) static av_cold int bfi_decode_init(AVCodecContext *avctx)
{ {
BFIContext *bfi = avctx->priv_data; BFIContext *bfi = avctx->priv_data;
avctx->pix_fmt = PIX_FMT_PAL8; avctx->pix_fmt = PIX_FMT_PAL8;
avcodec_get_frame_defaults(&bfi->frame); avcodec_get_frame_defaults(&bfi->frame);
bfi->dst = av_mallocz(avctx->width * avctx->height); bfi->dst = av_mallocz(avctx->width * avctx->height);
return 0; return 0;
} }
...@@ -50,9 +50,9 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data, ...@@ -50,9 +50,9 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
int *data_size, AVPacket *avpkt) int *data_size, AVPacket *avpkt)
{ {
GetByteContext g; GetByteContext g;
int buf_size = avpkt->size; int buf_size = avpkt->size;
BFIContext *bfi = avctx->priv_data; BFIContext *bfi = avctx->priv_data;
uint8_t *dst = bfi->dst; uint8_t *dst = bfi->dst;
uint8_t *src, *dst_offset, colour1, colour2; uint8_t *src, *dst_offset, colour1, colour2;
uint8_t *frame_end = bfi->dst + avctx->width * avctx->height; uint8_t *frame_end = bfi->dst + avctx->width * avctx->height;
uint32_t *pal; uint32_t *pal;
...@@ -84,9 +84,8 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data, ...@@ -84,9 +84,8 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
int shift = 16; int shift = 16;
*pal = 0xFF << 24; *pal = 0xFF << 24;
for (j = 0; j < 3; j++, shift -= 8) for (j = 0; j < 3; j++, shift -= 8)
*pal += *pal += ((avctx->extradata[i * 3 + j] << 2) |
((avctx->extradata[i * 3 + j] << 2) | (avctx->extradata[i * 3 + j] >> 4)) << shift;
(avctx->extradata[i * 3 + j] >> 4)) << shift;
pal++; pal++;
} }
memcpy(bfi->pal, bfi->frame.data[1], sizeof(bfi->pal)); memcpy(bfi->pal, bfi->frame.data[1], sizeof(bfi->pal));
...@@ -112,7 +111,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data, ...@@ -112,7 +111,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
return -1; return -1;
} }
/* Get length and offset(if required) */ /* Get length and offset (if required) */
if (length == 0) { if (length == 0) {
if (code == 1) { if (code == 1) {
length = bytestream2_get_byte(&g); length = bytestream2_get_byte(&g);
...@@ -132,8 +131,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data, ...@@ -132,8 +131,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
break; break;
switch (code) { switch (code) {
case 0: // normal chain
case 0: //Normal Chain
if (length >= bytestream2_get_bytes_left(&g)) { if (length >= bytestream2_get_bytes_left(&g)) {
av_log(avctx, AV_LOG_ERROR, "Frame larger than buffer.\n"); av_log(avctx, AV_LOG_ERROR, "Frame larger than buffer.\n");
return -1; return -1;
...@@ -141,21 +139,18 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data, ...@@ -141,21 +139,18 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
bytestream2_get_buffer(&g, dst, length); bytestream2_get_buffer(&g, dst, length);
dst += length; dst += length;
break; break;
case 1: // back chain
case 1: //Back Chain
dst_offset = dst - offset; dst_offset = dst - offset;
length *= 4; //Convert dwords to bytes. length *= 4; // Convert dwords to bytes.
if (dst_offset < bfi->dst) if (dst_offset < bfi->dst)
break; break;
while (length--) while (length--)
*dst++ = *dst_offset++; *dst++ = *dst_offset++;
break; break;
case 2: // skip chain
case 2: //Skip Chain
dst += length; dst += length;
break; break;
case 3: // fill chain
case 3: //Fill Chain
colour1 = bytestream2_get_byte(&g); colour1 = bytestream2_get_byte(&g);
colour2 = bytestream2_get_byte(&g); colour2 = bytestream2_get_byte(&g);
while (length--) { while (length--) {
...@@ -163,7 +158,6 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data, ...@@ -163,7 +158,6 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
*dst++ = colour2; *dst++ = colour2;
} }
break; break;
} }
} }
...@@ -174,12 +168,12 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data, ...@@ -174,12 +168,12 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
src += avctx->width; src += avctx->width;
dst += bfi->frame.linesize[0]; dst += bfi->frame.linesize[0];
} }
*data_size = sizeof(AVFrame); *data_size = sizeof(AVFrame);
*(AVFrame *)data = bfi->frame; *(AVFrame *)data = bfi->frame;
return buf_size; return buf_size;
} }
static av_cold int bfi_decode_close(AVCodecContext * avctx) static av_cold int bfi_decode_close(AVCodecContext *avctx)
{ {
BFIContext *bfi = avctx->priv_data; BFIContext *bfi = avctx->priv_data;
if (bfi->frame.data[0]) if (bfi->frame.data[0])
......
This diff is collapsed.
...@@ -53,6 +53,7 @@ static int bmp_decode_frame(AVCodecContext *avctx, ...@@ -53,6 +53,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
uint8_t *ptr; uint8_t *ptr;
int dsize; int dsize;
const uint8_t *buf0 = buf; const uint8_t *buf0 = buf;
GetByteContext gb;
if(buf_size < 14){ if(buf_size < 14){
av_log(avctx, AV_LOG_ERROR, "buf size too small (%d)\n", buf_size); av_log(avctx, AV_LOG_ERROR, "buf size too small (%d)\n", buf_size);
...@@ -269,7 +270,8 @@ static int bmp_decode_frame(AVCodecContext *avctx, ...@@ -269,7 +270,8 @@ static int bmp_decode_frame(AVCodecContext *avctx,
p->data[0] += p->linesize[0] * (avctx->height - 1); p->data[0] += p->linesize[0] * (avctx->height - 1);
p->linesize[0] = -p->linesize[0]; p->linesize[0] = -p->linesize[0];
} }
ff_msrle_decode(avctx, (AVPicture*)p, depth, buf, dsize); bytestream2_init(&gb, buf, dsize);
ff_msrle_decode(avctx, (AVPicture*)p, depth, &gb);
if(height < 0){ if(height < 0){
p->data[0] += p->linesize[0] * (avctx->height - 1); p->data[0] += p->linesize[0] * (avctx->height - 1);
p->linesize[0] = -p->linesize[0]; p->linesize[0] = -p->linesize[0];
......
...@@ -105,10 +105,6 @@ float ff_xvid_rate_estimate_qscale(MpegEncContext *s, int dry_run){ ...@@ -105,10 +105,6 @@ float ff_xvid_rate_estimate_qscale(MpegEncContext *s, int dry_run){
xvid_plg_data.bquant_offset = 0; // 100 * s->avctx->b_quant_offset; xvid_plg_data.bquant_offset = 0; // 100 * s->avctx->b_quant_offset;
xvid_plg_data.bquant_ratio = 100; // * s->avctx->b_quant_factor; xvid_plg_data.bquant_ratio = 100; // * s->avctx->b_quant_factor;
#if 0
xvid_plg_data.stats.hlength= X
#endif
if(!s->rc_context.dry_run_qscale){ if(!s->rc_context.dry_run_qscale){
if(s->picture_number){ if(s->picture_number){
xvid_plg_data.length= xvid_plg_data.length=
......
OBJS-$(HAVE_MMI) += mips/dsputil_mmi.o \ MMI-OBJS += mips/dsputil_mmi.o \
mips/idct_mmi.o \ mips/idct_mmi.o \
mips/mpegvideo_mmi.o \ mips/mpegvideo_mmi.o \
...@@ -40,6 +40,7 @@ typedef struct MsrleContext { ...@@ -40,6 +40,7 @@ typedef struct MsrleContext {
AVCodecContext *avctx; AVCodecContext *avctx;
AVFrame frame; AVFrame frame;
GetByteContext gb;
const unsigned char *buf; const unsigned char *buf;
int size; int size;
...@@ -127,7 +128,8 @@ static int msrle_decode_frame(AVCodecContext *avctx, ...@@ -127,7 +128,8 @@ static int msrle_decode_frame(AVCodecContext *avctx,
ptr += s->frame.linesize[0]; ptr += s->frame.linesize[0];
} }
} else { } else {
ff_msrle_decode(avctx, (AVPicture*)&s->frame, avctx->bits_per_coded_sample, buf, buf_size); bytestream2_init(&s->gb, buf, buf_size);
ff_msrle_decode(avctx, (AVPicture*)&s->frame, avctx->bits_per_coded_sample, &s->gb);
} }
*data_size = sizeof(AVFrame); *data_size = sizeof(AVFrame);
......
This diff is collapsed.
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#define AVCODEC_MSRLEDEC_H #define AVCODEC_MSRLEDEC_H
#include "avcodec.h" #include "avcodec.h"
#include "bytestream.h"
/** /**
* Decode stream in MS RLE format into frame. * Decode stream in MS RLE format into frame.
...@@ -30,10 +31,9 @@ ...@@ -30,10 +31,9 @@
* @param avctx codec context * @param avctx codec context
* @param pic destination frame * @param pic destination frame
* @param depth bit depth * @param depth bit depth
* @param data input stream * @param gb input bytestream context
* @param data_size input size
*/ */
int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic, int depth, int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic,
const uint8_t* data, int data_size); int depth, GetByteContext *gb);
#endif /* AVCODEC_MSRLEDEC_H */ #endif /* AVCODEC_MSRLEDEC_H */
...@@ -11,7 +11,7 @@ ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o ...@@ -11,7 +11,7 @@ ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o
ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o
ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o
OBJS-$(HAVE_ALTIVEC) += ppc/dsputil_altivec.o \ ALTIVEC-OBJS += ppc/dsputil_altivec.o \
ppc/fdct_altivec.o \ ppc/fdct_altivec.o \
ppc/float_altivec.o \ ppc/float_altivec.o \
ppc/fmtconvert_altivec.o \ ppc/fmtconvert_altivec.o \
...@@ -19,4 +19,3 @@ OBJS-$(HAVE_ALTIVEC) += ppc/dsputil_altivec.o \ ...@@ -19,4 +19,3 @@ OBJS-$(HAVE_ALTIVEC) += ppc/dsputil_altivec.o \
ppc/idct_altivec.o \ ppc/idct_altivec.o \
ppc/int_altivec.o \ ppc/int_altivec.o \
ppc/mpegvideo_altivec.o \ ppc/mpegvideo_altivec.o \
$(ALTIVEC-OBJS-yes)
This diff is collapsed.
OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \ VIS-OBJS += sparc/dsputil_vis.o \
sparc/simple_idct_vis.o \ sparc/simple_idct_vis.o \
...@@ -58,6 +58,7 @@ typedef struct TsccContext { ...@@ -58,6 +58,7 @@ typedef struct TsccContext {
unsigned int decomp_size; unsigned int decomp_size;
// Decompression buffer // Decompression buffer
unsigned char* decomp_buf; unsigned char* decomp_buf;
GetByteContext gb;
int height; int height;
z_stream zstream; z_stream zstream;
...@@ -105,8 +106,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac ...@@ -105,8 +106,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
} }
if(zret != Z_DATA_ERROR) if (zret != Z_DATA_ERROR) {
ff_msrle_decode(avctx, (AVPicture*)&c->pic, c->bpp, c->decomp_buf, c->decomp_size - c->zstream.avail_out); bytestream2_init(&c->gb, c->decomp_buf,
c->decomp_size - c->zstream.avail_out);
ff_msrle_decode(avctx, (AVPicture*)&c->pic, c->bpp, &c->gb);
}
/* make the palette available on the way out */ /* make the palette available on the way out */
if (c->avctx->pix_fmt == PIX_FMT_PAL8) { if (c->avctx->pix_fmt == PIX_FMT_PAL8) {
......
...@@ -2,7 +2,7 @@ OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o ...@@ -2,7 +2,7 @@ OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
OBJS-$(HAVE_MMX) += x86/dsputil_mmx.o \ MMX-OBJS += x86/dsputil_mmx.o \
x86/fdct_mmx.o \ x86/fdct_mmx.o \
x86/fmtconvert_mmx.o \ x86/fmtconvert_mmx.o \
x86/idct_mmx_xvid.o \ x86/idct_mmx_xvid.o \
...@@ -74,7 +74,6 @@ YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp.o \ ...@@ -74,7 +74,6 @@ YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp.o \
x86/vp56dsp.o x86/vp56dsp.o
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \ YASM-OBJS += x86/dsputil_yasm.o \
x86/deinterlace.o \ x86/deinterlace.o \
x86/fmtconvert.o \ x86/fmtconvert.o \
$(YASM-OBJS-yes)
...@@ -136,10 +136,10 @@ cglobal put_signed_rect_clamped_%1, 5,7,3, dst, dst_stride, src, src_stride, w, ...@@ -136,10 +136,10 @@ cglobal put_signed_rect_clamped_%1, 5,7,3, dst, dst_stride, src, src_stride, w,
and wd, ~(mmsize-1) and wd, ~(mmsize-1)
%if ARCH_X86_64 %if ARCH_X86_64
mov r10d, r5m mov r7d, r5m
mov r11d, wd mov r8d, wd
%define wspill r11d %define wspill r8d
%define hd r10d %define hd r7d
%else %else
mov r4m, wd mov r4m, wd
%define wspill r4m %define wspill r4m
......
...@@ -497,9 +497,9 @@ cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset ...@@ -497,9 +497,9 @@ cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset
%macro EMU_EDGE_FUNC 0 %macro EMU_EDGE_FUNC 0
%if ARCH_X86_64 %if ARCH_X86_64
%define w_reg r10 %define w_reg r7
cglobal emu_edge_core, 6, 7, 1 cglobal emu_edge_core, 6, 9, 1
mov r11, r5 ; save block_h mov r8, r5 ; save block_h
%else %else
%define w_reg r6 %define w_reg r6
cglobal emu_edge_core, 2, 7, 0 cglobal emu_edge_core, 2, 7, 0
...@@ -536,7 +536,7 @@ cglobal emu_edge_core, 2, 7, 0 ...@@ -536,7 +536,7 @@ cglobal emu_edge_core, 2, 7, 0
sub r0, w_reg sub r0, w_reg
%if ARCH_X86_64 %if ARCH_X86_64
mov r3, r0 ; backup of buf+block_h*linesize mov r3, r0 ; backup of buf+block_h*linesize
mov r5, r11 mov r5, r8
%else %else
mov r0m, r0 ; backup of buf+block_h*linesize mov r0m, r0 ; backup of buf+block_h*linesize
mov r5, r5m mov r5, r5m
...@@ -550,7 +550,7 @@ cglobal emu_edge_core, 2, 7, 0 ...@@ -550,7 +550,7 @@ cglobal emu_edge_core, 2, 7, 0
; FIXME we can do a if size == 1 here if that makes any speed difference, test me ; FIXME we can do a if size == 1 here if that makes any speed difference, test me
sar w_reg, 1 sar w_reg, 1
sal w_reg, 6 sal w_reg, 6
; r0=buf+block_h*linesize,r10(64)/r6(32)=start_x offset for funcs ; r0=buf+block_h*linesize,r7(64)/r6(32)=start_x offset for funcs
; r6(rax)/r3(ebx)=val,r2=linesize,r1=start_x,r5=block_h ; r6(rax)/r3(ebx)=val,r2=linesize,r1=start_x,r5=block_h
%ifdef PIC %ifdef PIC
lea rax, [.emuedge_extend_left_2] lea rax, [.emuedge_extend_left_2]
...@@ -560,7 +560,7 @@ cglobal emu_edge_core, 2, 7, 0 ...@@ -560,7 +560,7 @@ cglobal emu_edge_core, 2, 7, 0
%endif %endif
call w_reg call w_reg
; now r3(64)/r0(32)=buf,r2=linesize,r11/r5=block_h,r6/r3=val, r10/r6=end_x, r1=block_w ; now r3(64)/r0(32)=buf,r2=linesize,r8/r5=block_h,r6/r3=val, r7/r6=end_x, r1=block_w
.right_extend: .right_extend:
%if ARCH_X86_32 %if ARCH_X86_32
mov r0, r0m mov r0, r0m
...@@ -591,7 +591,7 @@ cglobal emu_edge_core, 2, 7, 0 ...@@ -591,7 +591,7 @@ cglobal emu_edge_core, 2, 7, 0
%define vall al %define vall al
%define valh ah %define valh ah
%define valw ax %define valw ax
%define valw2 r10w %define valw2 r7w
%define valw3 r3w %define valw3 r3w
%if WIN64 %if WIN64
%define valw4 r4w %define valw4 r4w
...@@ -618,7 +618,7 @@ cglobal emu_edge_core, 2, 7, 0 ...@@ -618,7 +618,7 @@ cglobal emu_edge_core, 2, 7, 0
; - else if (%2 & 8) fills 8 bytes into mm0 ; - else if (%2 & 8) fills 8 bytes into mm0
; - if (%2 & 7 == 4) fills the last 4 bytes into rax ; - if (%2 & 7 == 4) fills the last 4 bytes into rax
; - else if (%2 & 4) fills 4 bytes into mm0-1 ; - else if (%2 & 4) fills 4 bytes into mm0-1
; - if (%2 & 3 == 3) fills 2 bytes into r10/r3, and 1 into eax ; - if (%2 & 3 == 3) fills 2 bytes into r7/r3, and 1 into eax
; (note that we're using r3 for body/bottom because it's a shorter ; (note that we're using r3 for body/bottom because it's a shorter
; opcode, and then the loop fits in 128 bytes) ; opcode, and then the loop fits in 128 bytes)
; - else fills remaining bytes into rax ; - else fills remaining bytes into rax
...@@ -848,7 +848,7 @@ ALIGN 64 ...@@ -848,7 +848,7 @@ ALIGN 64
%endrep %endrep
%endmacro ; LEFT_EXTEND %endmacro ; LEFT_EXTEND
; r3/r0=buf+block_h*linesize, r2=linesize, r11/r5=block_h, r0/r6=end_x, r6/r3=val ; r3/r0=buf+block_h*linesize, r2=linesize, r8/r5=block_h, r0/r6=end_x, r6/r3=val
%macro RIGHT_EXTEND 0 %macro RIGHT_EXTEND 0
%assign %%n 2 %assign %%n 2
%rep 11 %rep 11
...@@ -858,7 +858,7 @@ ALIGN 64 ...@@ -858,7 +858,7 @@ ALIGN 64
sub r3, r2 ; dst -= linesize sub r3, r2 ; dst -= linesize
READ_V_PIXEL %%n, [r3+w_reg-1] ; read pixels READ_V_PIXEL %%n, [r3+w_reg-1] ; read pixels
WRITE_V_PIXEL %%n, r3+r4-%%n ; write pixels WRITE_V_PIXEL %%n, r3+r4-%%n ; write pixels
dec r11 dec r8
%else ; ARCH_X86_32 %else ; ARCH_X86_32
sub r0, r2 ; dst -= linesize sub r0, r2 ; dst -= linesize
READ_V_PIXEL %%n, [r0+w_reg-1] ; read pixels READ_V_PIXEL %%n, [r0+w_reg-1] ; read pixels
...@@ -937,11 +937,11 @@ ALIGN 64 ...@@ -937,11 +937,11 @@ ALIGN 64
%macro SLOW_V_EXTEND 0 %macro SLOW_V_EXTEND 0
.slow_v_extend_loop: .slow_v_extend_loop:
; r0=buf,r1=src,r2(64)/r2m(32)=linesize,r3(64)/r3m(32)=start_x,r4=end_y,r5=block_h ; r0=buf,r1=src,r2(64)/r2m(32)=linesize,r3(64)/r3m(32)=start_x,r4=end_y,r5=block_h
; r11(64)/r3(later-64)/r2(32)=cnt_reg,r6(64)/r3(32)=val_reg,r10(64)/r6(32)=w=end_x-start_x ; r8(64)/r3(later-64)/r2(32)=cnt_reg,r6(64)/r3(32)=val_reg,r7(64)/r6(32)=w=end_x-start_x
%if ARCH_X86_64 %if ARCH_X86_64
push r11 ; save old value of block_h push r8 ; save old value of block_h
test r3, r3 test r3, r3
%define cnt_reg r11 %define cnt_reg r8
jz .do_body_copy ; if (!start_y) goto do_body_copy jz .do_body_copy ; if (!start_y) goto do_body_copy
V_COPY_ROW top, r3 V_COPY_ROW top, r3
%else %else
...@@ -955,7 +955,7 @@ ALIGN 64 ...@@ -955,7 +955,7 @@ ALIGN 64
V_COPY_ROW body, r4 V_COPY_ROW body, r4
%if ARCH_X86_64 %if ARCH_X86_64
pop r11 ; restore old value of block_h pop r8 ; restore old value of block_h
%define cnt_reg r3 %define cnt_reg r3
%endif %endif
test r5, r5 test r5, r5
...@@ -974,7 +974,7 @@ ALIGN 64 ...@@ -974,7 +974,7 @@ ALIGN 64
%macro SLOW_LEFT_EXTEND 0 %macro SLOW_LEFT_EXTEND 0
.slow_left_extend_loop: .slow_left_extend_loop:
; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r10/r6=start_x ; r0=buf+block_h*linesize,r2=linesize,r6(64)/r3(32)=val,r5=block_h,r4=cntr,r7/r6=start_x
mov r4, 8 mov r4, 8
sub r0, linesize sub r0, linesize
READ_V_PIXEL 8, [r0+w_reg] READ_V_PIXEL 8, [r0+w_reg]
...@@ -1002,11 +1002,11 @@ ALIGN 64 ...@@ -1002,11 +1002,11 @@ ALIGN 64
%macro SLOW_RIGHT_EXTEND 0 %macro SLOW_RIGHT_EXTEND 0
.slow_right_extend_loop: .slow_right_extend_loop:
; r3(64)/r0(32)=buf+block_h*linesize,r2=linesize,r4=block_w,r11(64)/r5(32)=block_h, ; r3(64)/r0(32)=buf+block_h*linesize,r2=linesize,r4=block_w,r8(64)/r5(32)=block_h,
; r10(64)/r6(32)=end_x,r6/r3=val,r1=cntr ; r7(64)/r6(32)=end_x,r6/r3=val,r1=cntr
%if ARCH_X86_64 %if ARCH_X86_64
%define buf_reg r3 %define buf_reg r3
%define bh_reg r11 %define bh_reg r8
%else %else
%define buf_reg r0 %define buf_reg r0
%define bh_reg r5 %define bh_reg r5
......
...@@ -750,14 +750,11 @@ INIT_XMM ...@@ -750,14 +750,11 @@ INIT_XMM
%endmacro %endmacro
%macro DECL_IMDCT 2 %macro DECL_IMDCT 2
cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input cglobal imdct_half%1, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input
%if ARCH_X86_64 %if ARCH_X86_64
%define rrevtab r10 %define rrevtab r7
%define rtcos r11 %define rtcos r8
%define rtsin r12 %define rtsin r9
push r12
push r13
push r14
%else %else
%define rrevtab r6 %define rrevtab r6
%define rtsin r6 %define rtsin r6
...@@ -799,12 +796,12 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * ...@@ -799,12 +796,12 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
%if ARCH_X86_64 %if ARCH_X86_64
movzx r5, word [rrevtab+r4-4] movzx r5, word [rrevtab+r4-4]
movzx r6, word [rrevtab+r4-2] movzx r6, word [rrevtab+r4-2]
movzx r13, word [rrevtab+r3] movzx r10, word [rrevtab+r3]
movzx r14, word [rrevtab+r3+2] movzx r11, word [rrevtab+r3+2]
movlps [r1+r5 *8], xmm0 movlps [r1+r5 *8], xmm0
movhps [r1+r6 *8], xmm0 movhps [r1+r6 *8], xmm0
movlps [r1+r13*8], xmm1 movlps [r1+r10*8], xmm1
movhps [r1+r14*8], xmm1 movhps [r1+r11*8], xmm1
add r4, 4 add r4, 4
%else %else
mov r6, [esp] mov r6, [esp]
...@@ -840,11 +837,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * ...@@ -840,11 +837,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *
mov r1, -mmsize mov r1, -mmsize
sub r1, r0 sub r1, r0
%2 r0, r1, r6, rtcos, rtsin %2 r0, r1, r6, rtcos, rtsin
%if ARCH_X86_64 %if ARCH_X86_64 == 0
pop r14
pop r13
pop r12
%else
add esp, 12 add esp, 12
%endif %endif
%ifidn avx_enabled, 1 %ifidn avx_enabled, 1
......
...@@ -179,9 +179,8 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 ...@@ -179,9 +179,8 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro FLOAT_TO_INT16_INTERLEAVE6 1 %macro FLOAT_TO_INT16_INTERLEAVE6 1
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5 cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, src5, len
%if ARCH_X86_64 %if ARCH_X86_64
%define lend r10d
mov lend, r2d mov lend, r2d
%else %else
%define lend dword r2m %define lend dword r2m
...@@ -240,9 +239,8 @@ FLOAT_TO_INT16_INTERLEAVE6 3dn2 ...@@ -240,9 +239,8 @@ FLOAT_TO_INT16_INTERLEAVE6 3dn2
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro FLOAT_INTERLEAVE6 2 %macro FLOAT_INTERLEAVE6 2
cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5 cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, len
%if ARCH_X86_64 %if ARCH_X86_64
%define lend r10d
mov lend, r2d mov lend, r2d
%else %else
%define lend dword r2m %define lend dword r2m
......
...@@ -91,9 +91,22 @@ SECTION .text ...@@ -91,9 +91,22 @@ SECTION .text
%endmacro %endmacro
%macro chroma_mc8_mmx_func 3 %macro chroma_mc8_mmx_func 3
%ifidn %2, rv40
%ifdef PIC
%define rnd_1d_rv40 r8
%define rnd_2d_rv40 r8
%define extra_regs 2
%else ; no-PIC
%define rnd_1d_rv40 rnd_rv40_1d_tbl
%define rnd_2d_rv40 rnd_rv40_2d_tbl
%define extra_regs 1
%endif ; PIC
%else
%define extra_regs 0
%endif ; rv40
; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, ; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
; int stride, int h, int mx, int my) ; int stride, int h, int mx, int my)
cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
%if ARCH_X86_64 %if ARCH_X86_64
movsxd r2, r2d movsxd r2, r2d
%endif %endif
...@@ -106,19 +119,12 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 ...@@ -106,19 +119,12 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
.at_least_one_non_zero .at_least_one_non_zero
%ifidn %2, rv40 %ifidn %2, rv40
%ifdef PIC
%define rnd_1d_rv40 r11
%define rnd_2d_rv40 r11
%else ; no-PIC
%define rnd_1d_rv40 rnd_rv40_1d_tbl
%define rnd_2d_rv40 rnd_rv40_2d_tbl
%endif
%if ARCH_X86_64 %if ARCH_X86_64
mov r10, r5 mov r7, r5
and r10, 6 ; &~1 for mx/my=[0,7] and r7, 6 ; &~1 for mx/my=[0,7]
lea r10, [r10*4+r4] lea r7, [r7*4+r4]
sar r10d, 1 sar r7d, 1
%define rnd_bias r10 %define rnd_bias r7
%define dest_reg r0 %define dest_reg r0
%else ; x86-32 %else ; x86-32
mov r0, r5 mov r0, r5
...@@ -145,7 +151,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 ...@@ -145,7 +151,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%ifidn %2, rv40 %ifidn %2, rv40
%ifdef PIC %ifdef PIC
lea r11, [rnd_rv40_1d_tbl] lea r8, [rnd_rv40_1d_tbl]
%endif %endif
%if ARCH_X86_64 == 0 %if ARCH_X86_64 == 0
mov r5, r0m mov r5, r0m
...@@ -196,7 +202,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 ...@@ -196,7 +202,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
movd m6, r5d ; y movd m6, r5d ; y
%ifidn %2, rv40 %ifidn %2, rv40
%ifdef PIC %ifdef PIC
lea r11, [rnd_rv40_2d_tbl] lea r8, [rnd_rv40_2d_tbl]
%endif %endif
%if ARCH_X86_64 == 0 %if ARCH_X86_64 == 0
mov r5, r0m mov r5, r0m
...@@ -278,7 +284,13 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 ...@@ -278,7 +284,13 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
%endmacro %endmacro
%macro chroma_mc4_mmx_func 3 %macro chroma_mc4_mmx_func 3
cglobal %1_%2_chroma_mc4_%3, 6, 6, 0 %define extra_regs 0
%ifidn %2, rv40
%ifdef PIC
%define extra_regs 1
%endif ; PIC
%endif ; rv40
cglobal %1_%2_chroma_mc4_%3, 6, 6 + extra_regs, 0
%if ARCH_X86_64 %if ARCH_X86_64
movsxd r2, r2d movsxd r2, r2d
%endif %endif
...@@ -296,8 +308,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6, 0 ...@@ -296,8 +308,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
%ifidn %2, rv40 %ifidn %2, rv40
%ifdef PIC %ifdef PIC
lea r11, [rnd_rv40_2d_tbl] lea r6, [rnd_rv40_2d_tbl]
%define rnd_2d_rv40 r11 %define rnd_2d_rv40 r6
%else %else
%define rnd_2d_rv40 rnd_rv40_2d_tbl %define rnd_2d_rv40 rnd_rv40_2d_tbl
%endif %endif
......
...@@ -328,11 +328,11 @@ cglobal deblock_v_luma_8_%1, 5,5,10 ...@@ -328,11 +328,11 @@ cglobal deblock_v_luma_8_%1, 5,5,10
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX INIT_MMX
cglobal deblock_h_luma_8_%1, 5,7 cglobal deblock_h_luma_8_%1, 5,9
movsxd r10, r1d movsxd r7, r1d
lea r11, [r10+r10*2] lea r8, [r7+r7*2]
lea r6, [r0-4] lea r6, [r0-4]
lea r5, [r0-4+r11] lea r5, [r0-4+r8]
%if WIN64 %if WIN64
sub rsp, 0x98 sub rsp, 0x98
%define pix_tmp rsp+0x30 %define pix_tmp rsp+0x30
...@@ -342,14 +342,14 @@ cglobal deblock_h_luma_8_%1, 5,7 ...@@ -342,14 +342,14 @@ cglobal deblock_h_luma_8_%1, 5,7
%endif %endif
; transpose 6x16 -> tmp space ; transpose 6x16 -> tmp space
TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r10, r11), pix_tmp TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r7, r8), pix_tmp
lea r6, [r6+r10*8] lea r6, [r6+r7*8]
lea r5, [r5+r10*8] lea r5, [r5+r7*8]
TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r10, r11), pix_tmp+8 TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r7, r8), pix_tmp+8
; vertical filter ; vertical filter
; alpha, beta, tc0 are still in r2d, r3d, r4 ; alpha, beta, tc0 are still in r2d, r3d, r4
; don't backup r6, r5, r10, r11 because deblock_v_luma_sse2 doesn't use them ; don't backup r6, r5, r7, r8 because deblock_v_luma_sse2 doesn't use them
lea r0, [pix_tmp+0x30] lea r0, [pix_tmp+0x30]
mov r1d, 0x10 mov r1d, 0x10
%if WIN64 %if WIN64
...@@ -364,17 +364,17 @@ cglobal deblock_h_luma_8_%1, 5,7 ...@@ -364,17 +364,17 @@ cglobal deblock_h_luma_8_%1, 5,7
movq m1, [pix_tmp+0x28] movq m1, [pix_tmp+0x28]
movq m2, [pix_tmp+0x38] movq m2, [pix_tmp+0x38]
movq m3, [pix_tmp+0x48] movq m3, [pix_tmp+0x48]
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11) TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
shl r10, 3 shl r7, 3
sub r6, r10 sub r6, r7
sub r5, r10 sub r5, r7
shr r10, 3 shr r7, 3
movq m0, [pix_tmp+0x10] movq m0, [pix_tmp+0x10]
movq m1, [pix_tmp+0x20] movq m1, [pix_tmp+0x20]
movq m2, [pix_tmp+0x30] movq m2, [pix_tmp+0x30]
movq m3, [pix_tmp+0x40] movq m3, [pix_tmp+0x40]
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11) TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
%if WIN64 %if WIN64
add rsp, 0x98 add rsp, 0x98
...@@ -709,32 +709,32 @@ INIT_MMX ...@@ -709,32 +709,32 @@ INIT_MMX
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_8_%1, 4,7 cglobal deblock_h_luma_intra_8_%1, 4,9
movsxd r10, r1d movsxd r7, r1d
lea r11, [r10*3] lea r8, [r7*3]
lea r6, [r0-4] lea r6, [r0-4]
lea r5, [r0-4+r11] lea r5, [r0-4+r8]
sub rsp, 0x88 sub rsp, 0x88
%define pix_tmp rsp %define pix_tmp rsp
; transpose 8x16 -> tmp space ; transpose 8x16 -> tmp space
TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r10, r11), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30) TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
lea r6, [r6+r10*8] lea r6, [r6+r7*8]
lea r5, [r5+r10*8] lea r5, [r5+r7*8]
TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r10, r11), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30) TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
lea r0, [pix_tmp+0x40] lea r0, [pix_tmp+0x40]
mov r1, 0x10 mov r1, 0x10
call deblock_v_luma_intra_8_%1 call deblock_v_luma_intra_8_%1
; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8) ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
lea r5, [r6+r11] lea r5, [r6+r8]
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r6, r5, r10, r11) TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r6, r5, r7, r8)
shl r10, 3 shl r7, 3
sub r6, r10 sub r6, r7
sub r5, r10 sub r5, r7
shr r10, 3 shr r7, 3
TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r10, r11) TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r7, r8)
add rsp, 0x88 add rsp, 0x88
RET RET
%else %else
......
This diff is collapsed.
...@@ -29,24 +29,6 @@ SECTION_RODATA ...@@ -29,24 +29,6 @@ SECTION_RODATA
pw_pixel_max: times 8 dw ((1 << 10)-1) pw_pixel_max: times 8 dw ((1 << 10)-1)
pd_32: times 4 dd 32 pd_32: times 4 dd 32
scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
db 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8
db 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8
db 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8
db 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8
db 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8
db 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8
db 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8
db 4+11*8, 5+11*8, 4+12*8, 5+12*8
db 6+11*8, 7+11*8, 6+12*8, 7+12*8
db 4+13*8, 5+13*8, 4+14*8, 5+14*8
db 6+13*8, 7+13*8, 6+14*8, 7+14*8
%ifdef PIC
%define scan8 r11
%else
%define scan8 scan8_mem
%endif
SECTION .text SECTION .text
...@@ -315,9 +297,9 @@ IDCT_ADD16INTRA_10 avx ...@@ -315,9 +297,9 @@ IDCT_ADD16INTRA_10 avx
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) ; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro IDCT_ADD8 1 %macro IDCT_ADD8 1
cglobal h264_idct_add8_10_%1,5,7,7 cglobal h264_idct_add8_10_%1,5,8,7
%if ARCH_X86_64 %if ARCH_X86_64
mov r10, r0 mov r7, r0
%endif %endif
add r2, 1024 add r2, 1024
mov r0, [r0] mov r0, [r0]
...@@ -325,7 +307,7 @@ cglobal h264_idct_add8_10_%1,5,7,7 ...@@ -325,7 +307,7 @@ cglobal h264_idct_add8_10_%1,5,7,7
ADD16_OP_INTRA %1, 18, 4+ 7*8 ADD16_OP_INTRA %1, 18, 4+ 7*8
add r2, 1024-128*2 add r2, 1024-128*2
%if ARCH_X86_64 %if ARCH_X86_64
mov r0, [r10+gprsize] mov r0, [r7+gprsize]
%else %else
mov r0, r0m mov r0, r0m
mov r0, [r0+gprsize] mov r0, [r0+gprsize]
......
...@@ -289,7 +289,7 @@ cglobal pred16x16_tm_vp8_sse2, 2,6,6 ...@@ -289,7 +289,7 @@ cglobal pred16x16_tm_vp8_sse2, 2,6,6
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro H264_PRED16x16_PLANE 3 %macro H264_PRED16x16_PLANE 3
cglobal pred16x16_plane_%3_%1, 2, 7, %2 cglobal pred16x16_plane_%3_%1, 2, 9, %2
mov r2, r1 ; +stride mov r2, r1 ; +stride
neg r1 ; -stride neg r1 ; -stride
...@@ -349,7 +349,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 ...@@ -349,7 +349,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
add r4, r2 add r4, r2
%if ARCH_X86_64 %if ARCH_X86_64
%define e_reg r11 %define e_reg r8
%else %else
%define e_reg r0 %define e_reg r0
%endif %endif
...@@ -370,8 +370,8 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 ...@@ -370,8 +370,8 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
movzx e_reg, byte [r3 ] movzx e_reg, byte [r3 ]
%if ARCH_X86_64 %if ARCH_X86_64
movzx r10, byte [r4+r2 ] movzx r7, byte [r4+r2 ]
sub r10, e_reg sub r7, e_reg
%else %else
movzx r6, byte [r4+r2 ] movzx r6, byte [r4+r2 ]
sub r6, e_reg sub r6, e_reg
...@@ -386,7 +386,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 ...@@ -386,7 +386,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
movzx r6, byte [r3 ] movzx r6, byte [r3 ]
sub r6, r4 sub r6, r4
%if ARCH_X86_64 %if ARCH_X86_64
lea r6, [r10+r6*2] lea r6, [r7+r6*2]
lea r5, [r5+r6*2] lea r5, [r5+r6*2]
add r5, r6 add r5, r6
%else %else
...@@ -396,9 +396,9 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 ...@@ -396,9 +396,9 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
movzx r4, byte [e_reg ] movzx r4, byte [e_reg ]
%if ARCH_X86_64 %if ARCH_X86_64
movzx r10, byte [r3 +r2 ] movzx r7, byte [r3 +r2 ]
sub r10, r4 sub r7, r4
sub r5, r10 sub r5, r7
%else %else
movzx r6, byte [r3 +r2 ] movzx r6, byte [r3 +r2 ]
sub r6, r4 sub r6, r4
...@@ -410,7 +410,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 ...@@ -410,7 +410,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2
movzx r6, byte [r3 +r2*2] movzx r6, byte [r3 +r2*2]
sub r6, r4 sub r6, r4
%if ARCH_X86_64 %if ARCH_X86_64
add r6, r10 add r6, r7
%endif %endif
lea r5, [r5+r6*8] lea r5, [r5+r6*8]
...@@ -588,7 +588,7 @@ H264_PRED16x16_PLANE ssse3, 8, svq3 ...@@ -588,7 +588,7 @@ H264_PRED16x16_PLANE ssse3, 8, svq3
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro H264_PRED8x8_PLANE 2 %macro H264_PRED8x8_PLANE 2
cglobal pred8x8_plane_%1, 2, 7, %2 cglobal pred8x8_plane_%1, 2, 9, %2
mov r2, r1 ; +stride mov r2, r1 ; +stride
neg r1 ; -stride neg r1 ; -stride
...@@ -642,7 +642,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2 ...@@ -642,7 +642,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
add r4, r2 add r4, r2
%if ARCH_X86_64 %if ARCH_X86_64
%define e_reg r11 %define e_reg r8
%else %else
%define e_reg r0 %define e_reg r0
%endif %endif
...@@ -653,9 +653,9 @@ cglobal pred8x8_plane_%1, 2, 7, %2 ...@@ -653,9 +653,9 @@ cglobal pred8x8_plane_%1, 2, 7, %2
movzx e_reg, byte [r3 ] movzx e_reg, byte [r3 ]
%if ARCH_X86_64 %if ARCH_X86_64
movzx r10, byte [r4+r2 ] movzx r7, byte [r4+r2 ]
sub r10, e_reg sub r7, e_reg
sub r5, r10 sub r5, r7
%else %else
movzx r6, byte [r4+r2 ] movzx r6, byte [r4+r2 ]
sub r6, e_reg sub r6, e_reg
...@@ -667,7 +667,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2 ...@@ -667,7 +667,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2
movzx r6, byte [r4+r2*2 ] movzx r6, byte [r4+r2*2 ]
sub r6, e_reg sub r6, e_reg
%if ARCH_X86_64 %if ARCH_X86_64
add r6, r10 add r6, r7
%endif %endif
lea r5, [r5+r6*4] lea r5, [r5+r6*4]
......
...@@ -121,8 +121,8 @@ MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8 ...@@ -121,8 +121,8 @@ MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
%endmacro %endmacro
%macro MCAxA_OP 8 %macro MCAxA_OP 8
cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
%if ARCH_X86_32 %if ARCH_X86_32
cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
call stub_%2_h264_qpel%4_%3_10_%1 call stub_%2_h264_qpel%4_%3_10_%1
mov r0, r0m mov r0, r0m
mov r1, r1m mov r1, r1m
...@@ -141,17 +141,19 @@ cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8 ...@@ -141,17 +141,19 @@ cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
call stub_%2_h264_qpel%4_%3_10_%1 call stub_%2_h264_qpel%4_%3_10_%1
RET RET
%else ; ARCH_X86_64 %else ; ARCH_X86_64
mov r10, r0 cglobal %2_h264_qpel%5_%3_10_%1, %6,%7 + 2,%8
mov r11, r1 mov r%7, r0
%assign p1 %7+1
mov r %+ p1, r1
call stub_%2_h264_qpel%4_%3_10_%1 call stub_%2_h264_qpel%4_%3_10_%1
lea r0, [r10+%4*2] lea r0, [r%7+%4*2]
lea r1, [r11+%4*2] lea r1, [r %+ p1+%4*2]
call stub_%2_h264_qpel%4_%3_10_%1 call stub_%2_h264_qpel%4_%3_10_%1
lea r0, [r10+r2*%4] lea r0, [r%7+r2*%4]
lea r1, [r11+r2*%4] lea r1, [r %+ p1+r2*%4]
call stub_%2_h264_qpel%4_%3_10_%1 call stub_%2_h264_qpel%4_%3_10_%1
lea r0, [r10+r2*%4+%4*2] lea r0, [r%7+r2*%4+%4*2]
lea r1, [r11+r2*%4+%4*2] lea r1, [r %+ p1+r2*%4+%4*2]
%if UNIX64 == 0 ; fall through to function %if UNIX64 == 0 ; fall through to function
call stub_%2_h264_qpel%4_%3_10_%1 call stub_%2_h264_qpel%4_%3_10_%1
RET RET
......
...@@ -127,7 +127,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 ...@@ -127,7 +127,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SETUP 0 %macro BIWEIGHT_SETUP 0
%if ARCH_X86_64 %if ARCH_X86_64
%define off_regd r11d %define off_regd r7d
%else %else
%define off_regd r3d %define off_regd r3d
%endif %endif
...@@ -175,7 +175,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 ...@@ -175,7 +175,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
%endmacro %endmacro
INIT_MMX INIT_MMX
cglobal h264_biweight_16_mmx2, 7, 7, 0 cglobal h264_biweight_16_mmx2, 7, 8, 0
BIWEIGHT_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
.nextrow .nextrow
...@@ -194,7 +194,7 @@ cglobal h264_biweight_16_mmx2, 7, 7, 0 ...@@ -194,7 +194,7 @@ cglobal h264_biweight_16_mmx2, 7, 7, 0
REP_RET REP_RET
%macro BIWEIGHT_FUNC_MM 3 %macro BIWEIGHT_FUNC_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2 cglobal h264_biweight_%1_%3, 7, 8, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
.nextrow .nextrow
...@@ -215,7 +215,7 @@ INIT_XMM ...@@ -215,7 +215,7 @@ INIT_XMM
BIWEIGHT_FUNC_MM 16, 8, sse2 BIWEIGHT_FUNC_MM 16, 8, sse2
%macro BIWEIGHT_FUNC_HALF_MM 3 %macro BIWEIGHT_FUNC_HALF_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2 cglobal h264_biweight_%1_%3, 7, 8, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
sar r3, 1 sar r3, 1
...@@ -245,7 +245,7 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 ...@@ -245,7 +245,7 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SSSE3_SETUP 0 %macro BIWEIGHT_SSSE3_SETUP 0
%if ARCH_X86_64 %if ARCH_X86_64
%define off_regd r11d %define off_regd r7d
%else %else
%define off_regd r3d %define off_regd r3d
%endif %endif
...@@ -284,7 +284,7 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 ...@@ -284,7 +284,7 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
%endmacro %endmacro
INIT_XMM INIT_XMM
cglobal h264_biweight_16_ssse3, 7, 7, 8 cglobal h264_biweight_16_ssse3, 7, 8, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
...@@ -303,7 +303,7 @@ cglobal h264_biweight_16_ssse3, 7, 7, 8 ...@@ -303,7 +303,7 @@ cglobal h264_biweight_16_ssse3, 7, 7, 8
REP_RET REP_RET
INIT_XMM INIT_XMM
cglobal h264_biweight_8_ssse3, 7, 7, 8 cglobal h264_biweight_8_ssse3, 7, 8, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
movifnidn r3d, r3m movifnidn r3d, r3m
sar r3, 1 sar r3, 1
......
...@@ -57,6 +57,18 @@ AVOutputFormat ff_adx_muxer = { ...@@ -57,6 +57,18 @@ AVOutputFormat ff_adx_muxer = {
}; };
#endif #endif
#if CONFIG_CAVSVIDEO_MUXER
AVOutputFormat ff_cavsvideo_muxer = {
.name = "cavsvideo",
.long_name = NULL_IF_CONFIG_SMALL("raw Chinese AVS video"),
.extensions = "cavs",
.audio_codec = CODEC_ID_NONE,
.video_codec = CODEC_ID_CAVS,
.write_packet = ff_raw_write_packet,
.flags = AVFMT_NOTIMESTAMPS,
};
#endif
#if CONFIG_DIRAC_MUXER #if CONFIG_DIRAC_MUXER
AVOutputFormat ff_dirac_muxer = { AVOutputFormat ff_dirac_muxer = {
.name = "dirac", .name = "dirac",
...@@ -171,18 +183,6 @@ AVOutputFormat ff_h264_muxer = { ...@@ -171,18 +183,6 @@ AVOutputFormat ff_h264_muxer = {
}; };
#endif #endif
#if CONFIG_CAVSVIDEO_MUXER
AVOutputFormat ff_cavsvideo_muxer = {
.name = "cavsvideo",
.long_name = NULL_IF_CONFIG_SMALL("raw Chinese AVS video"),
.extensions = "cavs",
.audio_codec = CODEC_ID_NONE,
.video_codec = CODEC_ID_CAVS,
.write_packet = ff_raw_write_packet,
.flags = AVFMT_NOTIMESTAMPS,
};
#endif
#if CONFIG_M4V_MUXER #if CONFIG_M4V_MUXER
AVOutputFormat ff_m4v_muxer = { AVOutputFormat ff_m4v_muxer = {
.name = "m4v", .name = "m4v",
...@@ -220,30 +220,6 @@ AVOutputFormat ff_mlp_muxer = { ...@@ -220,30 +220,6 @@ AVOutputFormat ff_mlp_muxer = {
}; };
#endif #endif
#if CONFIG_SRT_MUXER
AVOutputFormat ff_srt_muxer = {
.name = "srt",
.long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle format"),
.mime_type = "application/x-subrip",
.extensions = "srt",
.write_packet = ff_raw_write_packet,
.flags = AVFMT_NOTIMESTAMPS,
.subtitle_codec = CODEC_ID_SRT,
};
#endif
#if CONFIG_TRUEHD_MUXER
AVOutputFormat ff_truehd_muxer = {
.name = "truehd",
.long_name = NULL_IF_CONFIG_SMALL("raw TrueHD"),
.extensions = "thd",
.audio_codec = CODEC_ID_TRUEHD,
.video_codec = CODEC_ID_NONE,
.write_packet = ff_raw_write_packet,
.flags = AVFMT_NOTIMESTAMPS,
};
#endif
#if CONFIG_MPEG1VIDEO_MUXER #if CONFIG_MPEG1VIDEO_MUXER
AVOutputFormat ff_mpeg1video_muxer = { AVOutputFormat ff_mpeg1video_muxer = {
.name = "mpeg1video", .name = "mpeg1video",
...@@ -280,3 +256,27 @@ AVOutputFormat ff_rawvideo_muxer = { ...@@ -280,3 +256,27 @@ AVOutputFormat ff_rawvideo_muxer = {
.flags = AVFMT_NOTIMESTAMPS, .flags = AVFMT_NOTIMESTAMPS,
}; };
#endif #endif
#if CONFIG_SRT_MUXER
AVOutputFormat ff_srt_muxer = {
.name = "srt",
.long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle format"),
.mime_type = "application/x-subrip",
.extensions = "srt",
.write_packet = ff_raw_write_packet,
.flags = AVFMT_NOTIMESTAMPS,
.subtitle_codec = CODEC_ID_SRT,
};
#endif
#if CONFIG_TRUEHD_MUXER
AVOutputFormat ff_truehd_muxer = {
.name = "truehd",
.long_name = NULL_IF_CONFIG_SMALL("raw TrueHD"),
.extensions = "thd",
.audio_codec = CODEC_ID_TRUEHD,
.video_codec = CODEC_ID_NONE,
.write_packet = ff_raw_write_packet,
.flags = AVFMT_NOTIMESTAMPS,
};
#endif
This diff is collapsed.
...@@ -17,14 +17,14 @@ OBJS = input.o \ ...@@ -17,14 +17,14 @@ OBJS = input.o \
OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \ OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \
bfin/swscale_bfin.o \ bfin/swscale_bfin.o \
bfin/yuv2rgb_bfin.o bfin/yuv2rgb_bfin.o
OBJS-$(HAVE_ALTIVEC) += ppc/swscale_altivec.o \ ALTIVEC-OBJS += ppc/swscale_altivec.o \
ppc/yuv2rgb_altivec.o \ ppc/yuv2rgb_altivec.o \
ppc/yuv2yuv_altivec.o ppc/yuv2yuv_altivec.o
OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \ MMX-OBJS += x86/rgb2rgb.o \
x86/swscale_mmx.o \ x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o x86/yuv2rgb_mmx.o
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o VIS-OBJS += sparc/yuv2rgb_vis.o
MMX-OBJS-$(HAVE_YASM) += x86/input.o \ YASM-OBJS += x86/input.o \
x86/output.o \ x86/output.o \
x86/scale.o x86/scale.o
......
...@@ -62,11 +62,11 @@ SECTION .text ...@@ -62,11 +62,11 @@ SECTION .text
%define cntr_reg fltsizeq %define cntr_reg fltsizeq
%define movsx mov %define movsx mov
%else %else
%define cntr_reg r11 %define cntr_reg r7
%define movsx movsxd %define movsx movsxd
%endif %endif
cglobal yuv2planeX_%1, %3, 7, %2, filter, fltsize, src, dst, w, dither, offset cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
%if %1 == 8 || %1 == 9 || %1 == 10 %if %1 == 8 || %1 == 9 || %1 == 10
pxor m6, m6 pxor m6, m6
%endif ; %1 == 8/9/10 %endif ; %1 == 8/9/10
......
...@@ -53,7 +53,7 @@ SECTION .text ...@@ -53,7 +53,7 @@ SECTION .text
%ifnidn %3, X %ifnidn %3, X
cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1 cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1
%else %else
cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize
%endif %endif
%if ARCH_X86_64 %if ARCH_X86_64
movsxd wq, wd movsxd wq, wd
...@@ -245,10 +245,9 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz ...@@ -245,10 +245,9 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz
%define dlt 0 %define dlt 0
%endif ; %4 ==/!= X4 %endif ; %4 ==/!= X4
%if ARCH_X86_64 %if ARCH_X86_64
push r12 %define srcq r8
%define srcq r11 %define pos1q r7
%define pos1q r10 %define srcendq r9
%define srcendq r12
movsxd fltsizeq, fltsized ; filterSize movsxd fltsizeq, fltsized ; filterSize
lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4] lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4]
%else ; x86-32 %else ; x86-32
...@@ -388,16 +387,7 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz ...@@ -388,16 +387,7 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz
add wq, 2 add wq, 2
%endif ; %3 ==/!= X %endif ; %3 ==/!= X
jl .loop jl .loop
%ifnidn %3, X
REP_RET REP_RET
%else ; %3 == X
%if ARCH_X86_64
pop r12
RET
%else ; x86-32
REP_RET
%endif ; x86-32/64
%endif ; %3 ==/!= X
%endmacro %endmacro
; SCALE_FUNCS source_width, intermediate_nbits, n_xmm ; SCALE_FUNCS source_width, intermediate_nbits, n_xmm
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment