Commit 18d0a16f authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  swscale: make yuv2yuv1 use named registers.
  h264: mark h264_idct_add8_10 with number of XMM registers.
  swscale: fix V plane memory location in bilinear/unscaled RGB/YUYV case.
  vp8: always update next_framep[] before returning from decode_frame().
  avconv: estimate next_dts from framerate if it is set.
  avconv: better next_dts usage.
  avconv: rename InputStream.pts to last_dts.
  avconv: reduce overloading for InputStream.pts.
  avconv: rename InputStream.next_pts to next_dts.
  avconv: rework -t handling for encoding.
  avconv: set encoder timebase for subtitles.
  pva-demux test: add -vn
  swscale: K&R formatting cosmetics for SPARC code
  apedec: allow the user to set the maximum number of output samples per call
  apedec: do not unnecessarily zero output samples for mono frames
  apedec: allocate a single flat buffer for decoded samples
  apedec: use sizeof(field) instead of sizeof(type)
  swscale: split C output functions into separate file.
  swscale: Split C input functions into separate file.
  bytestream: Add bytestream2 writing API.

The avconv changes are due to massive regressions and bugs not merged yet.

Conflicts:
	ffmpeg.c
	libavcodec/vp8.c
	libswscale/swscale.c
	libswscale/x86/swscale_template.c
	tests/fate/demux.mak
	tests/ref/lavf/asf
	tests/ref/lavf/avi
	tests/ref/lavf/mkv
	tests/ref/lavf/mpg
	tests/ref/lavf/nut
	tests/ref/lavf/ogg
	tests/ref/lavf/rm
	tests/ref/lavf/ts
	tests/ref/seek/lavf_avi
	tests/ref/seek/lavf_mkv
	tests/ref/seek/lavf_rm
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 950930b4 ef1c785f
...@@ -2616,6 +2616,7 @@ static int transcode_init(OutputFile *output_files, int nb_output_files, ...@@ -2616,6 +2616,7 @@ static int transcode_init(OutputFile *output_files, int nb_output_files,
#endif #endif
break; break;
case AVMEDIA_TYPE_SUBTITLE: case AVMEDIA_TYPE_SUBTITLE:
codec->time_base = (AVRational){1, 1000};
break; break;
default: default:
abort(); abort();
......
This diff is collapsed.
/* /*
* Bytestream functions * Bytestream functions
* copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr> * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
* Copyright (c) 2012 Aneesh Dogra (lionaneesh) <lionaneesh@gmail.com>
* *
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
...@@ -30,6 +31,11 @@ typedef struct { ...@@ -30,6 +31,11 @@ typedef struct {
const uint8_t *buffer, *buffer_end, *buffer_start; const uint8_t *buffer, *buffer_end, *buffer_start;
} GetByteContext; } GetByteContext;
typedef struct {
uint8_t *buffer, *buffer_end, *buffer_start;
int eof;
} PutByteContext;
#define DEF_T(type, name, bytes, read, write) \ #define DEF_T(type, name, bytes, read, write) \
static av_always_inline type bytestream_get_ ## name(const uint8_t **b){\ static av_always_inline type bytestream_get_ ## name(const uint8_t **b){\
(*b) += bytes;\ (*b) += bytes;\
...@@ -39,6 +45,17 @@ static av_always_inline void bytestream_put_ ##name(uint8_t **b, const type valu ...@@ -39,6 +45,17 @@ static av_always_inline void bytestream_put_ ##name(uint8_t **b, const type valu
write(*b, value);\ write(*b, value);\
(*b) += bytes;\ (*b) += bytes;\
}\ }\
static av_always_inline void bytestream2_put_ ## name ## u(PutByteContext *p, const type value)\
{\
bytestream_put_ ## name(&p->buffer, value);\
}\
static av_always_inline void bytestream2_put_ ## name(PutByteContext *p, const type value){\
if (!p->eof && (p->buffer_end - p->buffer >= bytes)) {\
write(p->buffer, value);\
p->buffer += bytes;\
} else\
p->eof = 1;\
}\
static av_always_inline type bytestream2_get_ ## name ## u(GetByteContext *g)\ static av_always_inline type bytestream2_get_ ## name ## u(GetByteContext *g)\
{\ {\
return bytestream_get_ ## name(&g->buffer);\ return bytestream_get_ ## name(&g->buffer);\
...@@ -119,22 +136,53 @@ static av_always_inline void bytestream2_init(GetByteContext *g, ...@@ -119,22 +136,53 @@ static av_always_inline void bytestream2_init(GetByteContext *g,
g->buffer_end = buf + buf_size; g->buffer_end = buf + buf_size;
} }
static av_always_inline void bytestream2_init_writer(PutByteContext *p,
uint8_t *buf, int buf_size)
{
p->buffer = buf;
p->buffer_start = buf;
p->buffer_end = buf + buf_size;
p->eof = 0;
}
static av_always_inline unsigned int bytestream2_get_bytes_left(GetByteContext *g) static av_always_inline unsigned int bytestream2_get_bytes_left(GetByteContext *g)
{ {
return g->buffer_end - g->buffer; return g->buffer_end - g->buffer;
} }
static av_always_inline unsigned int bytestream2_get_bytes_left_p(PutByteContext *p)
{
return p->buffer_end - p->buffer;
}
static av_always_inline void bytestream2_skip(GetByteContext *g, static av_always_inline void bytestream2_skip(GetByteContext *g,
unsigned int size) unsigned int size)
{ {
g->buffer += FFMIN(g->buffer_end - g->buffer, size); g->buffer += FFMIN(g->buffer_end - g->buffer, size);
} }
static av_always_inline void bytestream2_skip_p(PutByteContext *p,
unsigned int size)
{
int size2;
if (p->eof)
return;
size2 = FFMIN(p->buffer_end - p->buffer, size);
if (size2 != size)
p->eof = 1;
p->buffer += size2;
}
static av_always_inline int bytestream2_tell(GetByteContext *g) static av_always_inline int bytestream2_tell(GetByteContext *g)
{ {
return (int)(g->buffer - g->buffer_start); return (int)(g->buffer - g->buffer_start);
} }
static av_always_inline int bytestream2_tell_p(PutByteContext *p)
{
return (int)(p->buffer - p->buffer_start);
}
static av_always_inline int bytestream2_seek(GetByteContext *g, int offset, static av_always_inline int bytestream2_seek(GetByteContext *g, int offset,
int whence) int whence)
{ {
...@@ -158,6 +206,36 @@ static av_always_inline int bytestream2_seek(GetByteContext *g, int offset, ...@@ -158,6 +206,36 @@ static av_always_inline int bytestream2_seek(GetByteContext *g, int offset,
return bytestream2_tell(g); return bytestream2_tell(g);
} }
static av_always_inline int bytestream2_seek_p(PutByteContext *p, int offset,
int whence)
{
p->eof = 0;
switch (whence) {
case SEEK_CUR:
if (p->buffer_end - p->buffer < offset)
p->eof = 1;
offset = av_clip(offset, -(p->buffer - p->buffer_start),
p->buffer_end - p->buffer);
p->buffer += offset;
break;
case SEEK_END:
if (offset > 0)
p->eof = 1;
offset = av_clip(offset, -(p->buffer_end - p->buffer_start), 0);
p->buffer = p->buffer_end + offset;
break;
case SEEK_SET:
if (p->buffer_end - p->buffer_start < offset)
p->eof = 1;
offset = av_clip(offset, 0, p->buffer_end - p->buffer_start);
p->buffer = p->buffer_start + offset;
break;
default:
return AVERROR(EINVAL);
}
return bytestream2_tell_p(p);
}
static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g, static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g,
uint8_t *dst, uint8_t *dst,
unsigned int size) unsigned int size)
...@@ -168,6 +246,40 @@ static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g, ...@@ -168,6 +246,40 @@ static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g,
return size2; return size2;
} }
static av_always_inline unsigned int bytestream2_put_buffer(PutByteContext *p,
const uint8_t *src,
unsigned int size)
{
int size2;
if (p->eof)
return 0;
size2 = FFMIN(p->buffer_end - p->buffer, size);
if (size2 != size)
p->eof = 1;
memcpy(p->buffer, src, size2);
p->buffer += size2;
return size2;
}
static av_always_inline void bytestream2_set_buffer(PutByteContext *p,
const uint8_t c,
unsigned int size)
{
int size2;
if (p->eof)
return;
size2 = FFMIN(p->buffer_end - p->buffer, size);
if (size2 != size)
p->eof = 1;
memset(p->buffer, c, size2);
p->buffer += size2;
}
static av_always_inline unsigned int bytestream2_get_eof(PutByteContext *p)
{
return p->eof;
}
static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b, uint8_t *dst, unsigned int size) static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b, uint8_t *dst, unsigned int size)
{ {
memcpy(dst, *b, size); memcpy(dst, *b, size);
......
...@@ -1561,18 +1561,6 @@ static void release_queued_segmaps(VP8Context *s, int is_close) ...@@ -1561,18 +1561,6 @@ static void release_queued_segmaps(VP8Context *s, int is_close)
s->maps_are_invalid = 0; s->maps_are_invalid = 0;
} }
/**
* Sets things up for skipping the current frame.
* In particular, removes it from the reference buffers.
*/
static void skipframe_clear(VP8Context *s)
{
s->invisible = 1;
s->next_framep[VP56_FRAME_CURRENT] = NULL;
if (s->update_last)
s->next_framep[VP56_FRAME_PREVIOUS] = NULL;
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt) AVPacket *avpkt)
{ {
...@@ -1584,7 +1572,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1584,7 +1572,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
release_queued_segmaps(s, 0); release_queued_segmaps(s, 0);
if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
return ret; goto err;
prev_frame = s->framep[VP56_FRAME_CURRENT]; prev_frame = s->framep[VP56_FRAME_CURRENT];
...@@ -1594,6 +1582,11 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1594,6 +1582,11 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
skip_thresh = !referenced ? AVDISCARD_NONREF : skip_thresh = !referenced ? AVDISCARD_NONREF :
!s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
if (avctx->skip_frame >= skip_thresh) {
s->invisible = 1;
memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
goto skip_decode;
}
s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
// release no longer referenced frames // release no longer referenced frames
...@@ -1618,6 +1611,27 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1618,6 +1611,27 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
abort(); abort();
} }
if (curframe->data[0])
vp8_release_frame(s, curframe, 1, 0);
// Given that arithmetic probabilities are updated every frame, it's quite likely
// that the values we have on a random interframe are complete junk if we didn't
// start decode on a keyframe. So just don't display anything rather than junk.
if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
!s->framep[VP56_FRAME_GOLDEN] ||
!s->framep[VP56_FRAME_GOLDEN2])) {
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
ret = AVERROR_INVALIDDATA;
goto err;
}
curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0;
if ((ret = vp8_alloc_frame(s, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
goto err;
}
// check if golden and altref are swapped // check if golden and altref are swapped
if (s->update_altref != VP56_FRAME_NONE) { if (s->update_altref != VP56_FRAME_NONE) {
...@@ -1637,36 +1651,6 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1637,36 +1651,6 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
} }
s->next_framep[VP56_FRAME_CURRENT] = curframe; s->next_framep[VP56_FRAME_CURRENT] = curframe;
if (avctx->skip_frame >= skip_thresh) {
skipframe_clear(s);
ret = avpkt->size;
goto skip_decode;
}
// Given that arithmetic probabilities are updated every frame, it's quite likely
// that the values we have on a random interframe are complete junk if we didn't
// start decode on a keyframe. So just don't display anything rather than junk.
if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
!s->framep[VP56_FRAME_GOLDEN] ||
!s->framep[VP56_FRAME_GOLDEN2])) {
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
skipframe_clear(s);
ret = AVERROR_INVALIDDATA;
goto skip_decode;
}
if (curframe->data[0])
vp8_release_frame(s, curframe, 1, 0);
curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0;
if ((ret = vp8_alloc_frame(s, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
skipframe_clear(s);
goto skip_decode;
}
ff_thread_finish_setup(avctx); ff_thread_finish_setup(avctx);
s->linesize = curframe->linesize[0]; s->linesize = curframe->linesize[0];
...@@ -1778,20 +1762,22 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1778,20 +1762,22 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
} }
ff_thread_report_progress(curframe, INT_MAX, 0); ff_thread_report_progress(curframe, INT_MAX, 0);
ret = avpkt->size; memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
skip_decode: skip_decode:
// if future frames don't use the updated probabilities, // if future frames don't use the updated probabilities,
// reset them to the values we saved // reset them to the values we saved
if (!s->update_probabilities) if (!s->update_probabilities)
s->prob[0] = s->prob[1]; s->prob[0] = s->prob[1];
memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
if (!s->invisible) { if (!s->invisible) {
*(AVFrame*)data = *curframe; *(AVFrame*)data = *curframe;
*data_size = sizeof(AVFrame); *data_size = sizeof(AVFrame);
} }
return avpkt->size;
err:
memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
return ret; return ret;
} }
......
...@@ -315,7 +315,7 @@ IDCT_ADD16INTRA_10 avx ...@@ -315,7 +315,7 @@ IDCT_ADD16INTRA_10 avx
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) ; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro IDCT_ADD8 1 %macro IDCT_ADD8 1
cglobal h264_idct_add8_10_%1,5,7 cglobal h264_idct_add8_10_%1,5,7,7
%if ARCH_X86_64 %if ARCH_X86_64
mov r10, r0 mov r10, r0
%endif %endif
......
...@@ -5,8 +5,8 @@ FFLIBS = avutil ...@@ -5,8 +5,8 @@ FFLIBS = avutil
HEADERS = swscale.h HEADERS = swscale.h
OBJS = options.o rgb2rgb.o swscale.o utils.o yuv2rgb.o \ OBJS = input.o options.o output.o rgb2rgb.o swscale.o \
swscale_unscaled.o swscale_unscaled.o utils.o yuv2rgb.o
OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \ OBJS-$(ARCH_BFIN) += bfin/internal_bfin.o \
bfin/swscale_bfin.o \ bfin/swscale_bfin.o \
......
This diff is collapsed.
This diff is collapsed.
...@@ -28,15 +28,15 @@ ...@@ -28,15 +28,15 @@
#define YUV2RGB_INIT \ #define YUV2RGB_INIT \
"wr %%g0, 0x10, %%gsr \n\t" \ "wr %%g0, 0x10, %%gsr \n\t" \
"ldd [%5], %%f32 \n\t" \ "ldd [%5], %%f32 \n\t" \
"ldd [%5+8], %%f34 \n\t" \ "ldd [%5 + 8], %%f34 \n\t" \
"ldd [%5+16], %%f36 \n\t" \ "ldd [%5 + 16], %%f36 \n\t" \
"ldd [%5+24], %%f38 \n\t" \ "ldd [%5 + 24], %%f38 \n\t" \
"ldd [%5+32], %%f40 \n\t" \ "ldd [%5 + 32], %%f40 \n\t" \
"ldd [%5+40], %%f42 \n\t" \ "ldd [%5 + 40], %%f42 \n\t" \
"ldd [%5+48], %%f44 \n\t" \ "ldd [%5 + 48], %%f44 \n\t" \
"ldd [%5+56], %%f46 \n\t" \ "ldd [%5 + 56], %%f46 \n\t" \
"ldd [%5+64], %%f48 \n\t" \ "ldd [%5 + 64], %%f48 \n\t" \
"ldd [%5+72], %%f50 \n\t" "ldd [%5 + 72], %%f50 \n\t"
#define YUV2RGB_KERNEL \ #define YUV2RGB_KERNEL \
/* ^^^^ f0=Y f3=u f5=v */ \ /* ^^^^ f0=Y f3=u f5=v */ \
...@@ -78,23 +78,22 @@ ...@@ -78,23 +78,22 @@
"fpack16 %%f2, %%f2 \n\t" \ "fpack16 %%f2, %%f2 \n\t" \
"fpack16 %%f18, %%f18 \n\t" "fpack16 %%f18, %%f18 \n\t"
// FIXME: must be changed to set alpha to 255 instead of 0 // FIXME: must be changed to set alpha to 255 instead of 0
static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int vis_420P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[],
int srcSliceH, uint8_t* dst[], int dstStride[]) int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{ {
int y, out1, out2, out3, out4, out5, out6; int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) { for (y = 0; y < srcSliceH; ++y)
__asm__ volatile ( __asm__ volatile (
YUV2RGB_INIT YUV2RGB_INIT
"wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */
"1: \n\t" "1: \n\t"
"ldda [%1] %%asi, %%f2 \n\t" "ldda [%1] %%asi, %%f2 \n\t"
"ldda [%1+2] %%asi, %%f18 \n\t" "ldda [%1 + 2] %%asi, %%f18 \n\t"
"ldda [%2] %%asi, %%f4 \n\t" "ldda [%2] %%asi, %%f4 \n\t"
"ldda [%2+2] %%asi, %%f20 \n\t" "ldda [%2 + 2] %%asi, %%f20 \n\t"
"ld [%0], %%f0 \n\t" "ld [%0], %%f0 \n\t"
"ld [%0+4], %%f16 \n\t" "ld [%0+4], %%f16 \n\t"
"fpmerge %%f3, %%f3, %%f2 \n\t" "fpmerge %%f3, %%f3, %%f2 \n\t"
...@@ -103,53 +102,53 @@ static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -103,53 +102,53 @@ static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
"fpmerge %%f21, %%f21, %%f20 \n\t" "fpmerge %%f21, %%f21, %%f20 \n\t"
YUV2RGB_KERNEL YUV2RGB_KERNEL
"fzero %%f0 \n\t" "fzero %%f0 \n\t"
"fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 "fpmerge %%f4, %%f6, %%f8 \n\t" // r, b, t1
"fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 "fpmerge %%f20, %%f22, %%f24 \n\t" // r, b, t1
"fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 "fpmerge %%f0, %%f2, %%f10 \n\t" // 0, g, t2
"fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 "fpmerge %%f0, %%f18, %%f26 \n\t" // 0, g, t2
"fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb "fpmerge %%f10, %%f8, %%f4 \n\t" // t2, t1, msb
"fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb "fpmerge %%f26, %%f24, %%f20 \n\t" // t2, t1, msb
"fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb "fpmerge %%f11, %%f9, %%f6 \n\t" // t2, t1, lsb
"fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb "fpmerge %%f27, %%f25, %%f22 \n\t" // t2, t1, lsb
"std %%f4, [%3] \n\t" "std %%f4, [%3] \n\t"
"std %%f20, [%3+16] \n\t" "std %%f20, [%3 + 16] \n\t"
"std %%f6, [%3+8] \n\t" "std %%f6, [%3 + 8] \n\t"
"std %%f22, [%3+24] \n\t" "std %%f22, [%3 + 24] \n\t"
"add %0, 8, %0 \n\t" "add %0, 8, %0 \n\t"
"add %1, 4, %1 \n\t" "add %1, 4, %1 \n\t"
"add %2, 4, %2 \n\t" "add %2, 4, %2 \n\t"
"subcc %4, 8, %4 \n\t" "subcc %4, 8, %4 \n\t"
"bne 1b \n\t" "bne 1b \n\t"
"add %3, 32, %3 \n\t" //delay slot "add %3, 32, %3 \n\t" // delay slot
: "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
: "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+((y+srcSliceY)>>1)*srcStride[1]), : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + ((y + srcSliceY) >> 1) * srcStride[1]),
"2" (src[2]+((y+srcSliceY)>>1)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), "2" (src[2] + ((y + srcSliceY) >> 1) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]),
"4" (c->dstW), "4" (c->dstW),
"5" (c->sparc_coeffs) "5" (c->sparc_coeffs)
); );
}
return srcSliceH; return srcSliceH;
} }
// FIXME: must be changed to set alpha to 255 instead of 0 // FIXME: must be changed to set alpha to 255 instead of 0
static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int vis_422P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[],
int srcSliceH, uint8_t* dst[], int dstStride[]) int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{ {
int y, out1, out2, out3, out4, out5, out6; int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) { for (y = 0; y < srcSliceH; ++y)
__asm__ volatile ( __asm__ volatile (
YUV2RGB_INIT YUV2RGB_INIT
"wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */
"1: \n\t" "1: \n\t"
"ldda [%1] %%asi, %%f2 \n\t" "ldda [%1] %%asi, %%f2 \n\t"
"ldda [%1+2] %%asi, %%f18 \n\t" "ldda [%1 + 2] %%asi, %%f18 \n\t"
"ldda [%2] %%asi, %%f4 \n\t" "ldda [%2] %%asi, %%f4 \n\t"
"ldda [%2+2] %%asi, %%f20 \n\t" "ldda [%2 + 2] %%asi, %%f20 \n\t"
"ld [%0], %%f0 \n\t" "ld [%0], %%f0 \n\t"
"ld [%0+4], %%f16 \n\t" "ld [%0 + 4], %%f16 \n\t"
"fpmerge %%f3, %%f3, %%f2 \n\t" "fpmerge %%f3, %%f3, %%f2 \n\t"
"fpmerge %%f19, %%f19, %%f18 \n\t" "fpmerge %%f19, %%f19, %%f18 \n\t"
"fpmerge %%f5, %%f5, %%f4 \n\t" "fpmerge %%f5, %%f5, %%f4 \n\t"
...@@ -165,9 +164,9 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -165,9 +164,9 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
"fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb
"fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb
"std %%f4, [%3] \n\t" "std %%f4, [%3] \n\t"
"std %%f20, [%3+16] \n\t" "std %%f20, [%3 + 16] \n\t"
"std %%f6, [%3+8] \n\t" "std %%f6, [%3 + 8] \n\t"
"std %%f22, [%3+24] \n\t" "std %%f22, [%3 + 24] \n\t"
"add %0, 8, %0 \n\t" "add %0, 8, %0 \n\t"
"add %1, 4, %1 \n\t" "add %1, 4, %1 \n\t"
...@@ -176,36 +175,36 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -176,36 +175,36 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
"bne 1b \n\t" "bne 1b \n\t"
"add %3, 32, %3 \n\t" //delay slot "add %3, 32, %3 \n\t" //delay slot
: "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6)
: "0" (src[0]+(y+srcSliceY)*srcStride[0]), "1" (src[1]+(y+srcSliceY)*srcStride[1]), : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + (y + srcSliceY) * srcStride[1]),
"2" (src[2]+(y+srcSliceY)*srcStride[2]), "3" (dst[0]+(y+srcSliceY)*dstStride[0]), "2" (src[2] + (y + srcSliceY) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]),
"4" (c->dstW), "4" (c->dstW),
"5" (c->sparc_coeffs) "5" (c->sparc_coeffs)
); );
}
return srcSliceH; return srcSliceH;
} }
SwsFunc ff_yuv2rgb_init_vis(SwsContext *c) SwsFunc ff_yuv2rgb_init_vis(SwsContext *c)
{ {
c->sparc_coeffs[5]=c->yCoeff; c->sparc_coeffs[5] = c->yCoeff;
c->sparc_coeffs[6]=c->vgCoeff; c->sparc_coeffs[6] = c->vgCoeff;
c->sparc_coeffs[7]=c->vrCoeff; c->sparc_coeffs[7] = c->vrCoeff;
c->sparc_coeffs[8]=c->ubCoeff; c->sparc_coeffs[8] = c->ubCoeff;
c->sparc_coeffs[9]=c->ugCoeff; c->sparc_coeffs[9] = c->ugCoeff;
c->sparc_coeffs[0]=(((int16_t)c->yOffset*(int16_t)c->yCoeff >>11) & 0xffff) * 0x0001000100010001ULL; c->sparc_coeffs[0] = (((int16_t)c->yOffset * (int16_t)c->yCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
c->sparc_coeffs[1]=(((int16_t)c->uOffset*(int16_t)c->ubCoeff>>11) & 0xffff) * 0x0001000100010001ULL; c->sparc_coeffs[1] = (((int16_t)c->uOffset * (int16_t)c->ubCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
c->sparc_coeffs[2]=(((int16_t)c->uOffset*(int16_t)c->ugCoeff>>11) & 0xffff) * 0x0001000100010001ULL; c->sparc_coeffs[2] = (((int16_t)c->uOffset * (int16_t)c->ugCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
c->sparc_coeffs[3]=(((int16_t)c->vOffset*(int16_t)c->vgCoeff>>11) & 0xffff) * 0x0001000100010001ULL; c->sparc_coeffs[3] = (((int16_t)c->vOffset * (int16_t)c->vgCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
c->sparc_coeffs[4]=(((int16_t)c->vOffset*(int16_t)c->vrCoeff>>11) & 0xffff) * 0x0001000100010001ULL; c->sparc_coeffs[4] = (((int16_t)c->vOffset * (int16_t)c->vrCoeff >> 11) & 0xffff) * 0x0001000100010001ULL;
if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7)==0) { if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV422P && (c->dstW & 7) == 0) {
av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n"); av_log(c, AV_LOG_INFO,
"SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n");
return vis_422P_ARGB32; return vis_422P_ARGB32;
} } else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7) == 0) {
else if (c->dstFormat == PIX_FMT_RGB32 && c->srcFormat == PIX_FMT_YUV420P && (c->dstW & 7)==0) { av_log(c, AV_LOG_INFO,
av_log(c, AV_LOG_INFO, "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n"); "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n");
return vis_420P_ARGB32; return vis_420P_ARGB32;
} }
return NULL; return NULL;
......
This diff is collapsed.
...@@ -359,8 +359,8 @@ typedef struct SwsContext { ...@@ -359,8 +359,8 @@ typedef struct SwsContext {
#define V_TEMP "11*8+4*4*256*2+32" #define V_TEMP "11*8+4*4*256*2+32"
#define Y_TEMP "11*8+4*4*256*2+40" #define Y_TEMP "11*8+4*4*256*2+40"
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF "11*8+4*4*256*3+48" #define UV_OFF_PX "11*8+4*4*256*3+48"
#define UV_OFFx2 "11*8+4*4*256*3+56" #define UV_OFF_BYTE "11*8+4*4*256*3+56"
#define DITHER16 "11*8+4*4*256*3+64" #define DITHER16 "11*8+4*4*256*3+64"
#define DITHER32 "11*8+4*4*256*3+80" #define DITHER32 "11*8+4*4*256*3+80"
...@@ -706,6 +706,14 @@ void ff_swscale_get_unscaled_altivec(SwsContext *c); ...@@ -706,6 +706,14 @@ void ff_swscale_get_unscaled_altivec(SwsContext *c);
*/ */
SwsFunc ff_getSwsFunc(SwsContext *c); SwsFunc ff_getSwsFunc(SwsContext *c);
void ff_sws_init_input_funcs(SwsContext *c);
void ff_sws_init_output_funcs(SwsContext *c,
yuv2planar1_fn *yuv2plane1,
yuv2planarX_fn *yuv2planeX,
yuv2interleavedX_fn *yuv2nv12cX,
yuv2packed1_fn *yuv2packed1,
yuv2packed2_fn *yuv2packed2,
yuv2packedX_fn *yuv2packedX);
void ff_sws_init_swScale_altivec(SwsContext *c); void ff_sws_init_swScale_altivec(SwsContext *c);
void ff_sws_init_swScale_mmx(SwsContext *c); void ff_sws_init_swScale_mmx(SwsContext *c);
......
...@@ -45,6 +45,102 @@ ...@@ -45,6 +45,102 @@
#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) #define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) #define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
{
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
{ 0, 1, 0, 1, 0, 1, 0, 1,},
{ 1, 0, 1, 0, 1, 0, 1, 0,},
},{
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
{ 1, 2, 1, 2, 1, 2, 1, 2,},
{ 3, 0, 3, 0, 3, 0, 3, 0,},
},{
{ 2, 4, 3, 5, 2, 4, 3, 5,},
{ 6, 0, 7, 1, 6, 0, 7, 1,},
{ 3, 5, 2, 4, 3, 5, 2, 4,},
{ 7, 1, 6, 0, 7, 1, 6, 0,},
{ 2, 4, 3, 5, 2, 4, 3, 5,},
{ 6, 0, 7, 1, 6, 0, 7, 1,},
{ 3, 5, 2, 4, 3, 5, 2, 4,},
{ 7, 1, 6, 0, 7, 1, 6, 0,},
},{
{ 4, 8, 7, 11, 4, 8, 7, 11,},
{ 12, 0, 15, 3, 12, 0, 15, 3,},
{ 6, 10, 5, 9, 6, 10, 5, 9,},
{ 14, 2, 13, 1, 14, 2, 13, 1,},
{ 4, 8, 7, 11, 4, 8, 7, 11,},
{ 12, 0, 15, 3, 12, 0, 15, 3,},
{ 6, 10, 5, 9, 6, 10, 5, 9,},
{ 14, 2, 13, 1, 14, 2, 13, 1,},
},{
{ 9, 17, 15, 23, 8, 16, 14, 22,},
{ 25, 1, 31, 7, 24, 0, 30, 6,},
{ 13, 21, 11, 19, 12, 20, 10, 18,},
{ 29, 5, 27, 3, 28, 4, 26, 2,},
{ 8, 16, 14, 22, 9, 17, 15, 23,},
{ 24, 0, 30, 6, 25, 1, 31, 7,},
{ 12, 20, 10, 18, 13, 21, 11, 19,},
{ 28, 4, 26, 2, 29, 5, 27, 3,},
},{
{ 18, 34, 30, 46, 17, 33, 29, 45,},
{ 50, 2, 62, 14, 49, 1, 61, 13,},
{ 26, 42, 22, 38, 25, 41, 21, 37,},
{ 58, 10, 54, 6, 57, 9, 53, 5,},
{ 16, 32, 28, 44, 19, 35, 31, 47,},
{ 48, 0, 60, 12, 51, 3, 63, 15,},
{ 24, 40, 20, 36, 27, 43, 23, 39,},
{ 56, 8, 52, 4, 59, 11, 55, 7,},
},{
{ 18, 34, 30, 46, 17, 33, 29, 45,},
{ 50, 2, 62, 14, 49, 1, 61, 13,},
{ 26, 42, 22, 38, 25, 41, 21, 37,},
{ 58, 10, 54, 6, 57, 9, 53, 5,},
{ 16, 32, 28, 44, 19, 35, 31, 47,},
{ 48, 0, 60, 12, 51, 3, 63, 15,},
{ 24, 40, 20, 36, 27, 43, 23, 39,},
{ 56, 8, 52, 4, 59, 11, 55, 7,},
},{
{ 36, 68, 60, 92, 34, 66, 58, 90,},
{ 100, 4,124, 28, 98, 2,122, 26,},
{ 52, 84, 44, 76, 50, 82, 42, 74,},
{ 116, 20,108, 12,114, 18,106, 10,},
{ 32, 64, 56, 88, 38, 70, 62, 94,},
{ 96, 0,120, 24,102, 6,126, 30,},
{ 48, 80, 40, 72, 54, 86, 46, 78,},
{ 112, 16,104, 8,118, 22,110, 14,},
}};
static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
const uint16_t dither_scale[15][16]={
{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
};
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
uint8_t val) uint8_t val)
{ {
......
...@@ -275,17 +275,17 @@ yuv2planeX_fn 10, 7, 5 ...@@ -275,17 +275,17 @@ yuv2planeX_fn 10, 7, 5
%macro yuv2plane1_mainloop 2 %macro yuv2plane1_mainloop 2
.loop_%2: .loop_%2:
%if %1 == 8 %if %1 == 8
paddsw m0, m2, [r0+r2*2+mmsize*0] paddsw m0, m2, [srcq+dstwq*2+mmsize*0]
paddsw m1, m3, [r0+r2*2+mmsize*1] paddsw m1, m3, [srcq+dstwq*2+mmsize*1]
psraw m0, 7 psraw m0, 7
psraw m1, 7 psraw m1, 7
packuswb m0, m1 packuswb m0, m1
mov%2 [r1+r2], m0 mov%2 [r1+r2], m0
%elif %1 == 16 %elif %1 == 16
paddd m0, m4, [r0+r2*4+mmsize*0] paddd m0, m4, [srcq+dstwq*4+mmsize*0]
paddd m1, m4, [r0+r2*4+mmsize*1] paddd m1, m4, [srcq+dstwq*4+mmsize*1]
paddd m2, m4, [r0+r2*4+mmsize*2] paddd m2, m4, [srcq+dstwq*4+mmsize*2]
paddd m3, m4, [r0+r2*4+mmsize*3] paddd m3, m4, [srcq+dstwq*4+mmsize*3]
psrad m0, 3 psrad m0, 3
psrad m1, 3 psrad m1, 3
psrad m2, 3 psrad m2, 3
...@@ -299,46 +299,46 @@ yuv2planeX_fn 10, 7, 5 ...@@ -299,46 +299,46 @@ yuv2planeX_fn 10, 7, 5
paddw m0, m5 paddw m0, m5
paddw m2, m5 paddw m2, m5
%endif ; mmx/sse2/sse4/avx %endif ; mmx/sse2/sse4/avx
mov%2 [r1+r2*2], m0 mov%2 [dstq+dstwq*2+mmsize*0], m0
mov%2 [r1+r2*2+mmsize], m2 mov%2 [dstq+dstwq*2+mmsize*1], m2
%else %else ; %1 == 9/10
paddsw m0, m2, [r0+r2*2+mmsize*0] paddsw m0, m2, [srcq+dstwq*2+mmsize*0]
paddsw m1, m2, [r0+r2*2+mmsize*1] paddsw m1, m2, [srcq+dstwq*2+mmsize*1]
psraw m0, 15 - %1 psraw m0, 15 - %1
psraw m1, 15 - %1 psraw m1, 15 - %1
pmaxsw m0, m4 pmaxsw m0, m4
pmaxsw m1, m4 pmaxsw m1, m4
pminsw m0, m3 pminsw m0, m3
pminsw m1, m3 pminsw m1, m3
mov%2 [r1+r2*2], m0 mov%2 [dstq+dstwq*2+mmsize*0], m0
mov%2 [r1+r2*2+mmsize], m1 mov%2 [dstq+dstwq*2+mmsize*1], m1
%endif %endif
add r2, mmsize add dstwq, mmsize
jl .loop_%2 jl .loop_%2
%endmacro %endmacro
%macro yuv2plane1_fn 3 %macro yuv2plane1_fn 3
cglobal yuv2plane1_%1, %3, %3, %2 cglobal yuv2plane1_%1, %3, %3, %2, src, dst, dstw, dither, offset
add r2, mmsize - 1 add dstwq, mmsize - 1
and r2, ~(mmsize - 1) and dstwq, ~(mmsize - 1)
%if %1 == 8 %if %1 == 8
add r1, r2 add dstq, dstwq
%else ; %1 != 8 %else ; %1 != 8
lea r1, [r1+r2*2] lea dstq, [dstq+dstwq*2]
%endif ; %1 == 8 %endif ; %1 == 8
%if %1 == 16 %if %1 == 16
lea r0, [r0+r2*4] lea srcq, [srcq+dstwq*4]
%else ; %1 != 16 %else ; %1 != 16
lea r0, [r0+r2*2] lea srcq, [srcq+dstwq*2]
%endif ; %1 == 16 %endif ; %1 == 16
neg r2 neg dstwq
%if %1 == 8 %if %1 == 8
pxor m4, m4 ; zero pxor m4, m4 ; zero
; create registers holding dither ; create registers holding dither
movq m3, [r3] ; dither movq m3, [ditherq] ; dither
test r4d, r4d test offsetd, offsetd
jz .no_rot jz .no_rot
%if mmsize == 16 %if mmsize == 16
punpcklqdq m3, m3 punpcklqdq m3, m3
...@@ -374,7 +374,7 @@ cglobal yuv2plane1_%1, %3, %3, %2 ...@@ -374,7 +374,7 @@ cglobal yuv2plane1_%1, %3, %3, %2
%if mmsize == 8 %if mmsize == 8
yuv2plane1_mainloop %1, a yuv2plane1_mainloop %1, a
%else ; mmsize == 16 %else ; mmsize == 16
test r1, 15 test dstq, 15
jnz .unaligned jnz .unaligned
yuv2plane1_mainloop %1, a yuv2plane1_mainloop %1, a
REP_RET REP_RET
......
...@@ -762,10 +762,10 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -762,10 +762,10 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
...@@ -993,10 +993,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], ...@@ -993,10 +993,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
...@@ -1048,9 +1048,9 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], ...@@ -1048,9 +1048,9 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
...@@ -1101,10 +1101,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], ...@@ -1101,10 +1101,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
...@@ -1368,9 +1368,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, ...@@ -1368,9 +1368,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"psraw $7, %%mm3 \n\t" \ "psraw $7, %%mm3 \n\t" \
"psraw $7, %%mm4 \n\t" \ "psraw $7, %%mm4 \n\t" \
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
...@@ -1386,10 +1386,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, ...@@ -1386,10 +1386,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"1: \n\t"\ "1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
"add "UV_OFFx2"("#c"), "#index" \n\t" \ "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"sub "UV_OFFx2"("#c"), "#index" \n\t" \ "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $8, %%mm3 \n\t" \ "psrlw $8, %%mm3 \n\t" \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment