Commit 18d0a16f authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  swscale: make yuv2yuv1 use named registers.
  h264: mark h264_idct_add8_10 with number of XMM registers.
  swscale: fix V plane memory location in bilinear/unscaled RGB/YUYV case.
  vp8: always update next_framep[] before returning from decode_frame().
  avconv: estimate next_dts from framerate if it is set.
  avconv: better next_dts usage.
  avconv: rename InputStream.pts to last_dts.
  avconv: reduce overloading for InputStream.pts.
  avconv: rename InputStream.next_pts to next_dts.
  avconv: rework -t handling for encoding.
  avconv: set encoder timebase for subtitles.
  pva-demux test: add -vn
  swscale: K&R formatting cosmetics for SPARC code
  apedec: allow the user to set the maximum number of output samples per call
  apedec: do not unnecessarily zero output samples for mono frames
  apedec: allocate a single flat buffer for decoded samples
  apedec: use sizeof(field) instead of sizeof(type)
  swscale: split C output functions into separate file.
  swscale: Split C input functions into separate file.
  bytestream: Add bytestream2 writing API.

The avconv changes are due to massive regressions and bugs not merged yet.

Conflicts:
	ffmpeg.c
	libavcodec/vp8.c
	libswscale/swscale.c
	libswscale/x86/swscale_template.c
	tests/fate/demux.mak
	tests/ref/lavf/asf
	tests/ref/lavf/avi
	tests/ref/lavf/mkv
	tests/ref/lavf/mpg
	tests/ref/lavf/nut
	tests/ref/lavf/ogg
	tests/ref/lavf/rm
	tests/ref/lavf/ts
	tests/ref/seek/lavf_avi
	tests/ref/seek/lavf_mkv
	tests/ref/seek/lavf_rm
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 950930b4 ef1c785f
......@@ -2616,6 +2616,7 @@ static int transcode_init(OutputFile *output_files, int nb_output_files,
#endif
break;
case AVMEDIA_TYPE_SUBTITLE:
codec->time_base = (AVRational){1, 1000};
break;
default:
abort();
......
This diff is collapsed.
/*
* Bytestream functions
* copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
* Copyright (c) 2012 Aneesh Dogra (lionaneesh) <lionaneesh@gmail.com>
*
* This file is part of FFmpeg.
*
......@@ -30,6 +31,11 @@ typedef struct {
const uint8_t *buffer, *buffer_end, *buffer_start;
} GetByteContext;
typedef struct {
uint8_t *buffer, *buffer_end, *buffer_start;
int eof;
} PutByteContext;
#define DEF_T(type, name, bytes, read, write) \
static av_always_inline type bytestream_get_ ## name(const uint8_t **b){\
(*b) += bytes;\
......@@ -39,6 +45,17 @@ static av_always_inline void bytestream_put_ ##name(uint8_t **b, const type valu
write(*b, value);\
(*b) += bytes;\
}\
static av_always_inline void bytestream2_put_ ## name ## u(PutByteContext *p, const type value)\
{\
bytestream_put_ ## name(&p->buffer, value);\
}\
static av_always_inline void bytestream2_put_ ## name(PutByteContext *p, const type value){\
if (!p->eof && (p->buffer_end - p->buffer >= bytes)) {\
write(p->buffer, value);\
p->buffer += bytes;\
} else\
p->eof = 1;\
}\
static av_always_inline type bytestream2_get_ ## name ## u(GetByteContext *g)\
{\
return bytestream_get_ ## name(&g->buffer);\
......@@ -119,22 +136,53 @@ static av_always_inline void bytestream2_init(GetByteContext *g,
g->buffer_end = buf + buf_size;
}
static av_always_inline void bytestream2_init_writer(PutByteContext *p,
uint8_t *buf, int buf_size)
{
p->buffer = buf;
p->buffer_start = buf;
p->buffer_end = buf + buf_size;
p->eof = 0;
}
static av_always_inline unsigned int bytestream2_get_bytes_left(GetByteContext *g)
{
return g->buffer_end - g->buffer;
}
static av_always_inline unsigned int bytestream2_get_bytes_left_p(PutByteContext *p)
{
return p->buffer_end - p->buffer;
}
static av_always_inline void bytestream2_skip(GetByteContext *g,
unsigned int size)
{
g->buffer += FFMIN(g->buffer_end - g->buffer, size);
}
static av_always_inline void bytestream2_skip_p(PutByteContext *p,
unsigned int size)
{
int size2;
if (p->eof)
return;
size2 = FFMIN(p->buffer_end - p->buffer, size);
if (size2 != size)
p->eof = 1;
p->buffer += size2;
}
static av_always_inline int bytestream2_tell(GetByteContext *g)
{
return (int)(g->buffer - g->buffer_start);
}
static av_always_inline int bytestream2_tell_p(PutByteContext *p)
{
return (int)(p->buffer - p->buffer_start);
}
static av_always_inline int bytestream2_seek(GetByteContext *g, int offset,
int whence)
{
......@@ -158,6 +206,36 @@ static av_always_inline int bytestream2_seek(GetByteContext *g, int offset,
return bytestream2_tell(g);
}
static av_always_inline int bytestream2_seek_p(PutByteContext *p, int offset,
int whence)
{
p->eof = 0;
switch (whence) {
case SEEK_CUR:
if (p->buffer_end - p->buffer < offset)
p->eof = 1;
offset = av_clip(offset, -(p->buffer - p->buffer_start),
p->buffer_end - p->buffer);
p->buffer += offset;
break;
case SEEK_END:
if (offset > 0)
p->eof = 1;
offset = av_clip(offset, -(p->buffer_end - p->buffer_start), 0);
p->buffer = p->buffer_end + offset;
break;
case SEEK_SET:
if (p->buffer_end - p->buffer_start < offset)
p->eof = 1;
offset = av_clip(offset, 0, p->buffer_end - p->buffer_start);
p->buffer = p->buffer_start + offset;
break;
default:
return AVERROR(EINVAL);
}
return bytestream2_tell_p(p);
}
static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g,
uint8_t *dst,
unsigned int size)
......@@ -168,6 +246,40 @@ static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g,
return size2;
}
static av_always_inline unsigned int bytestream2_put_buffer(PutByteContext *p,
const uint8_t *src,
unsigned int size)
{
int size2;
if (p->eof)
return 0;
size2 = FFMIN(p->buffer_end - p->buffer, size);
if (size2 != size)
p->eof = 1;
memcpy(p->buffer, src, size2);
p->buffer += size2;
return size2;
}
static av_always_inline void bytestream2_set_buffer(PutByteContext *p,
const uint8_t c,
unsigned int size)
{
int size2;
if (p->eof)
return;
size2 = FFMIN(p->buffer_end - p->buffer, size);
if (size2 != size)
p->eof = 1;
memset(p->buffer, c, size2);
p->buffer += size2;
}
static av_always_inline unsigned int bytestream2_get_eof(PutByteContext *p)
{
return p->eof;
}
static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b, uint8_t *dst, unsigned int size)
{
memcpy(dst, *b, size);
......
......@@ -1561,18 +1561,6 @@ static void release_queued_segmaps(VP8Context *s, int is_close)
s->maps_are_invalid = 0;
}
/**
* Sets things up for skipping the current frame.
* In particular, removes it from the reference buffers.
*/
static void skipframe_clear(VP8Context *s)
{
s->invisible = 1;
s->next_framep[VP56_FRAME_CURRENT] = NULL;
if (s->update_last)
s->next_framep[VP56_FRAME_PREVIOUS] = NULL;
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt)
{
......@@ -1584,7 +1572,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
release_queued_segmaps(s, 0);
if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
return ret;
goto err;
prev_frame = s->framep[VP56_FRAME_CURRENT];
......@@ -1594,6 +1582,11 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
skip_thresh = !referenced ? AVDISCARD_NONREF :
!s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
if (avctx->skip_frame >= skip_thresh) {
s->invisible = 1;
memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
goto skip_decode;
}
s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
// release no longer referenced frames
......@@ -1618,6 +1611,27 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
abort();
}
if (curframe->data[0])
vp8_release_frame(s, curframe, 1, 0);
// Given that arithmetic probabilities are updated every frame, it's quite likely
// that the values we have on a random interframe are complete junk if we didn't
// start decode on a keyframe. So just don't display anything rather than junk.
if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
!s->framep[VP56_FRAME_GOLDEN] ||
!s->framep[VP56_FRAME_GOLDEN2])) {
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
ret = AVERROR_INVALIDDATA;
goto err;
}
curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0;
if ((ret = vp8_alloc_frame(s, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
goto err;
}
// check if golden and altref are swapped
if (s->update_altref != VP56_FRAME_NONE) {
......@@ -1637,36 +1651,6 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
}
s->next_framep[VP56_FRAME_CURRENT] = curframe;
if (avctx->skip_frame >= skip_thresh) {
skipframe_clear(s);
ret = avpkt->size;
goto skip_decode;
}
// Given that arithmetic probabilities are updated every frame, it's quite likely
// that the values we have on a random interframe are complete junk if we didn't
// start decode on a keyframe. So just don't display anything rather than junk.
if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
!s->framep[VP56_FRAME_GOLDEN] ||
!s->framep[VP56_FRAME_GOLDEN2])) {
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
skipframe_clear(s);
ret = AVERROR_INVALIDDATA;
goto skip_decode;
}
if (curframe->data[0])
vp8_release_frame(s, curframe, 1, 0);
curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0;
if ((ret = vp8_alloc_frame(s, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
skipframe_clear(s);
goto skip_decode;
}
ff_thread_finish_setup(avctx);
s->linesize = curframe->linesize[0];
......@@ -1778,20 +1762,22 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
}
ff_thread_report_progress(curframe, INT_MAX, 0);
ret = avpkt->size;
memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
skip_decode:
// if future frames don't use the updated probabilities,
// reset them to the values we saved
if (!s->update_probabilities)
s->prob[0] = s->prob[1];
memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
if (!s->invisible) {
*(AVFrame*)data = *curframe;
*data_size = sizeof(AVFrame);
}
return avpkt->size;
err:
memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
return ret;
}
......
......@@ -315,7 +315,7 @@ IDCT_ADD16INTRA_10 avx
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
%macro IDCT_ADD8 1
cglobal h264_idct_add8_10_%1,5,7
cglobal h264_idct_add8_10_%1,5,7,7
%if ARCH_X86_64
mov r10, r0
%endif
......
......@@ -5,8 +5,8 @@ FFLIBS = avutil
HEADERS = swscale.h
OBJS = options.o rgb2rgb.o swscale.o utils.o yuv2rgb.o \
swscale_unscaled.o
OBJS = input.o options.o output.o rgb2rgb.o swscale.o \
swscale_unscaled.o utils.o yuv2rgb.o
OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \
bfin/swscale_bfin.o \
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -359,8 +359,8 @@ typedef struct SwsContext {
#define V_TEMP "11*8+4*4*256*2+32"
#define Y_TEMP "11*8+4*4*256*2+40"
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF "11*8+4*4*256*3+48"
#define UV_OFFx2 "11*8+4*4*256*3+56"
#define UV_OFF_PX "11*8+4*4*256*3+48"
#define UV_OFF_BYTE "11*8+4*4*256*3+56"
#define DITHER16 "11*8+4*4*256*3+64"
#define DITHER32 "11*8+4*4*256*3+80"
......@@ -706,6 +706,14 @@ void ff_swscale_get_unscaled_altivec(SwsContext *c);
*/
SwsFunc ff_getSwsFunc(SwsContext *c);
void ff_sws_init_input_funcs(SwsContext *c);
void ff_sws_init_output_funcs(SwsContext *c,
yuv2planar1_fn *yuv2plane1,
yuv2planarX_fn *yuv2planeX,
yuv2interleavedX_fn *yuv2nv12cX,
yuv2packed1_fn *yuv2packed1,
yuv2packed2_fn *yuv2packed2,
yuv2packedX_fn *yuv2packedX);
void ff_sws_init_swScale_altivec(SwsContext *c);
void ff_sws_init_swScale_mmx(SwsContext *c);
......
......@@ -45,6 +45,102 @@
#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
{
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
},{
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
},{
{ 2, 4, 3, 5, 2, 4, 3, 5,},
{ 6, 0, 7, 1, 6, 0, 7, 1,},
{ 3, 5, 2, 4, 3, 5, 2, 4,},
{ 7, 1, 6, 0, 7, 1, 6, 0,},
{ 2, 4, 3, 5, 2, 4, 3, 5,},
{ 6, 0, 7, 1, 6, 0, 7, 1,},
{ 3, 5, 2, 4, 3, 5, 2, 4,},
{ 7, 1, 6, 0, 7, 1, 6, 0,},
},{
{ 4, 8, 7, 11, 4, 8, 7, 11,},
{ 12, 0, 15, 3, 12, 0, 15, 3,},
{ 6, 10, 5, 9, 6, 10, 5, 9,},
{ 14, 2, 13, 1, 14, 2, 13, 1,},
{ 4, 8, 7, 11, 4, 8, 7, 11,},
{ 12, 0, 15, 3, 12, 0, 15, 3,},
{ 6, 10, 5, 9, 6, 10, 5, 9,},
{ 14, 2, 13, 1, 14, 2, 13, 1,},
},{
{ 9, 17, 15, 23, 8, 16, 14, 22,},
{ 25, 1, 31, 7, 24, 0, 30, 6,},
{ 13, 21, 11, 19, 12, 20, 10, 18,},
{ 29, 5, 27, 3, 28, 4, 26, 2,},
{ 8, 16, 14, 22, 9, 17, 15, 23,},
{ 24, 0, 30, 6, 25, 1, 31, 7,},
{ 12, 20, 10, 18, 13, 21, 11, 19,},
{ 28, 4, 26, 2, 29, 5, 27, 3,},
},{
{ 18, 34, 30, 46, 17, 33, 29, 45,},
{ 50, 2, 62, 14, 49, 1, 61, 13,},
{ 26, 42, 22, 38, 25, 41, 21, 37,},
{ 58, 10, 54, 6, 57, 9, 53, 5,},
{ 16, 32, 28, 44, 19, 35, 31, 47,},
{ 48, 0, 60, 12, 51, 3, 63, 15,},
{ 24, 40, 20, 36, 27, 43, 23, 39,},
{ 56, 8, 52, 4, 59, 11, 55, 7,},
},{
{ 18, 34, 30, 46, 17, 33, 29, 45,},
{ 50, 2, 62, 14, 49, 1, 61, 13,},
{ 26, 42, 22, 38, 25, 41, 21, 37,},
{ 58, 10, 54, 6, 57, 9, 53, 5,},
{ 16, 32, 28, 44, 19, 35, 31, 47,},
{ 48, 0, 60, 12, 51, 3, 63, 15,},
{ 24, 40, 20, 36, 27, 43, 23, 39,},
{ 56, 8, 52, 4, 59, 11, 55, 7,},
},{
{ 36, 68, 60, 92, 34, 66, 58, 90,},
{ 100, 4,124, 28, 98, 2,122, 26,},
{ 52, 84, 44, 76, 50, 82, 42, 74,},
{ 116, 20,108, 12,114, 18,106, 10,},
{ 32, 64, 56, 88, 38, 70, 62, 94,},
{ 96, 0,120, 24,102, 6,126, 30,},
{ 48, 80, 40, 72, 54, 86, 46, 78,},
{ 112, 16,104, 8,118, 22,110, 14,},
}};
static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
const uint16_t dither_scale[15][16]={
{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
};
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
uint8_t val)
{
......
......@@ -275,17 +275,17 @@ yuv2planeX_fn 10, 7, 5
%macro yuv2plane1_mainloop 2
.loop_%2:
%if %1 == 8
paddsw m0, m2, [r0+r2*2+mmsize*0]
paddsw m1, m3, [r0+r2*2+mmsize*1]
paddsw m0, m2, [srcq+dstwq*2+mmsize*0]
paddsw m1, m3, [srcq+dstwq*2+mmsize*1]
psraw m0, 7
psraw m1, 7
packuswb m0, m1
mov%2 [r1+r2], m0
%elif %1 == 16
paddd m0, m4, [r0+r2*4+mmsize*0]
paddd m1, m4, [r0+r2*4+mmsize*1]
paddd m2, m4, [r0+r2*4+mmsize*2]
paddd m3, m4, [r0+r2*4+mmsize*3]
paddd m0, m4, [srcq+dstwq*4+mmsize*0]
paddd m1, m4, [srcq+dstwq*4+mmsize*1]
paddd m2, m4, [srcq+dstwq*4+mmsize*2]
paddd m3, m4, [srcq+dstwq*4+mmsize*3]
psrad m0, 3
psrad m1, 3
psrad m2, 3
......@@ -299,46 +299,46 @@ yuv2planeX_fn 10, 7, 5
paddw m0, m5
paddw m2, m5
%endif ; mmx/sse2/sse4/avx
mov%2 [r1+r2*2], m0
mov%2 [r1+r2*2+mmsize], m2
%else
paddsw m0, m2, [r0+r2*2+mmsize*0]
paddsw m1, m2, [r0+r2*2+mmsize*1]
mov%2 [dstq+dstwq*2+mmsize*0], m0
mov%2 [dstq+dstwq*2+mmsize*1], m2
%else ; %1 == 9/10
paddsw m0, m2, [srcq+dstwq*2+mmsize*0]
paddsw m1, m2, [srcq+dstwq*2+mmsize*1]
psraw m0, 15 - %1
psraw m1, 15 - %1
pmaxsw m0, m4
pmaxsw m1, m4
pminsw m0, m3
pminsw m1, m3
mov%2 [r1+r2*2], m0
mov%2 [r1+r2*2+mmsize], m1
mov%2 [dstq+dstwq*2+mmsize*0], m0
mov%2 [dstq+dstwq*2+mmsize*1], m1
%endif
add r2, mmsize
add dstwq, mmsize
jl .loop_%2
%endmacro
%macro yuv2plane1_fn 3
cglobal yuv2plane1_%1, %3, %3, %2
add r2, mmsize - 1
and r2, ~(mmsize - 1)
cglobal yuv2plane1_%1, %3, %3, %2, src, dst, dstw, dither, offset
add dstwq, mmsize - 1
and dstwq, ~(mmsize - 1)
%if %1 == 8
add r1, r2
add dstq, dstwq
%else ; %1 != 8
lea r1, [r1+r2*2]
lea dstq, [dstq+dstwq*2]
%endif ; %1 == 8
%if %1 == 16
lea r0, [r0+r2*4]
lea srcq, [srcq+dstwq*4]
%else ; %1 != 16
lea r0, [r0+r2*2]
lea srcq, [srcq+dstwq*2]
%endif ; %1 == 16
neg r2
neg dstwq
%if %1 == 8
pxor m4, m4 ; zero
; create registers holding dither
movq m3, [r3] ; dither
test r4d, r4d
movq m3, [ditherq] ; dither
test offsetd, offsetd
jz .no_rot
%if mmsize == 16
punpcklqdq m3, m3
......@@ -374,7 +374,7 @@ cglobal yuv2plane1_%1, %3, %3, %2
%if mmsize == 8
yuv2plane1_mainloop %1, a
%else ; mmsize == 16
test r1, 15
test dstq, 15
jnz .unaligned
yuv2plane1_mainloop %1, a
REP_RET
......
......@@ -762,10 +762,10 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
......@@ -993,10 +993,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
......@@ -1048,9 +1048,9 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
......@@ -1101,10 +1101,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
......@@ -1368,9 +1368,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psraw $7, %%mm3 \n\t" \
"psraw $7, %%mm4 \n\t" \
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
......@@ -1386,10 +1386,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \
"add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \
"sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $8, %%mm3 \n\t" \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment