Commit 1d048f76 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '9a738c27'

* commit '9a738c27':
  v210enc: Add SIMD optimised 8-bit and 10-bit encoders

Conflicts:
	libavcodec/v210enc.c
	libavcodec/v210enc.h
	libavcodec/x86/Makefile
	libavcodec/x86/v210enc.asm
	libavcodec/x86/v210enc_init.c
	tests/ref/vsynth/vsynth1-v210
	tests/ref/vsynth/vsynth2-v210

See: 36091742Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents e827f656 9a738c27
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#define WRITE_PIXELS(a, b, c) \ #define WRITE_PIXELS(a, b, c) \
do { \ do { \
val = CLIP(*a++); \ val = CLIP(*a++); \
val |= (CLIP(*b++) << 10) | \ val |= (CLIP(*b++) << 10) | \
(CLIP(*c++) << 20); \ (CLIP(*c++) << 20); \
AV_WL32(dst, val); \ AV_WL32(dst, val); \
...@@ -40,21 +40,22 @@ ...@@ -40,21 +40,22 @@
#define WRITE_PIXELS8(a, b, c) \ #define WRITE_PIXELS8(a, b, c) \
do { \ do { \
val = (CLIP8(*a++) << 2); \ val = (CLIP8(*a++) << 2); \
val |= (CLIP8(*b++) << 12) | \ val |= (CLIP8(*b++) << 12) | \
(CLIP8(*c++) << 22); \ (CLIP8(*c++) << 22); \
AV_WL32(dst, val); \ AV_WL32(dst, val); \
dst += 4; \ dst += 4; \
} while (0) } while (0)
static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u, static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u,
const uint8_t *v, uint8_t *dst, ptrdiff_t width) const uint8_t *v, uint8_t *dst,
ptrdiff_t width)
{ {
uint32_t val; uint32_t val;
int i; int i;
/* unroll this to match the assembly */ /* unroll this to match the assembly */
for( i = 0; i < width-11; i += 12 ){ for (i = 0; i < width - 11; i += 12) {
WRITE_PIXELS8(u, y, v); WRITE_PIXELS8(u, y, v);
WRITE_PIXELS8(y, u, y); WRITE_PIXELS8(y, u, y);
WRITE_PIXELS8(v, y, u); WRITE_PIXELS8(v, y, u);
...@@ -67,12 +68,13 @@ static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u, ...@@ -67,12 +68,13 @@ static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u,
} }
static void v210_planar_pack_10_c(const uint16_t *y, const uint16_t *u, static void v210_planar_pack_10_c(const uint16_t *y, const uint16_t *u,
const uint16_t *v, uint8_t *dst, ptrdiff_t width) const uint16_t *v, uint8_t *dst,
ptrdiff_t width)
{ {
uint32_t val; uint32_t val;
int i; int i;
for( i = 0; i < width-5; i += 6 ){ for (i = 0; i < width - 5; i += 6) {
WRITE_PIXELS(u, y, v); WRITE_PIXELS(u, y, v);
WRITE_PIXELS(y, u, y); WRITE_PIXELS(y, u, y);
WRITE_PIXELS(v, y, u); WRITE_PIXELS(v, y, u);
...@@ -95,8 +97,8 @@ static av_cold int encode_init(AVCodecContext *avctx) ...@@ -95,8 +97,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
s->pack_line_8 = v210_planar_pack_8_c; s->pack_line_8 = v210_planar_pack_8_c;
s->pack_line_10 = v210_planar_pack_10_c; s->pack_line_10 = v210_planar_pack_10_c;
if (ARCH_X86) if (ARCH_X86)
ff_v210enc_init_x86(s); ff_v210enc_init_x86(s);
...@@ -108,24 +110,23 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -108,24 +110,23 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
const AVFrame *pic, int *got_packet) const AVFrame *pic, int *got_packet)
{ {
V210EncContext *s = avctx->priv_data; V210EncContext *s = avctx->priv_data;
int aligned_width = ((avctx->width + 47) / 48) * 48; int aligned_width = ((avctx->width + 47) / 48) * 48;
int stride = aligned_width * 8 / 3; int stride = aligned_width * 8 / 3;
int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4; int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4;
int h, w, ret; int h, w, ret;
uint8_t *dst; uint8_t *dst;
if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) { ret = ff_alloc_packet(pkt, avctx->height * stride);
if (ret < 0) {
av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n"); av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
return ret; return ret;
} }
dst = pkt->data; dst = pkt->data;
if (pic->format == AV_PIX_FMT_YUV422P10) { if (pic->format == AV_PIX_FMT_YUV422P10) {
const uint16_t *y = (const uint16_t*)pic->data[0]; const uint16_t *y = (const uint16_t *)pic->data[0];
const uint16_t *u = (const uint16_t*)pic->data[1]; const uint16_t *u = (const uint16_t *)pic->data[1];
const uint16_t *v = (const uint16_t*)pic->data[2]; const uint16_t *v = (const uint16_t *)pic->data[2];
for (h = 0; h < avctx->height; h++) { for (h = 0; h < avctx->height; h++) {
uint32_t val; uint32_t val;
w = (avctx->width / 6) * 6; w = (avctx->width / 6) * 6;
...@@ -156,13 +157,11 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -156,13 +157,11 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
memset(dst, 0, line_padding); memset(dst, 0, line_padding);
dst += line_padding; dst += line_padding;
y += pic->linesize[0] / 2 - avctx->width; y += pic->linesize[0] / 2 - avctx->width;
u += pic->linesize[1] / 2 - avctx->width / 2; u += pic->linesize[1] / 2 - avctx->width / 2;
v += pic->linesize[2] / 2 - avctx->width / 2; v += pic->linesize[2] / 2 - avctx->width / 2;
} }
} } else if(pic->format == AV_PIX_FMT_YUV422P) {
else if(pic->format == AV_PIX_FMT_YUV422P) {
const uint8_t *y = pic->data[0]; const uint8_t *y = pic->data[0];
const uint8_t *u = pic->data[1]; const uint8_t *u = pic->data[1];
const uint8_t *v = pic->data[2]; const uint8_t *v = pic->data[2];
...@@ -176,7 +175,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -176,7 +175,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
v += w >> 1; v += w >> 1;
dst += (w / 12) * 32; dst += (w / 12) * 32;
for( ; w < avctx->width-5; w += 6 ){ for (; w < avctx->width - 5; w += 6) {
WRITE_PIXELS8(u, y, v); WRITE_PIXELS8(u, y, v);
WRITE_PIXELS8(y, u, y); WRITE_PIXELS8(y, u, y);
WRITE_PIXELS8(v, y, u); WRITE_PIXELS8(v, y, u);
...@@ -200,7 +199,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, ...@@ -200,7 +199,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
AV_WL32(dst, val); AV_WL32(dst, val);
dst += 4; dst += 4;
} }
memset(dst, 0, line_padding); memset(dst, 0, line_padding);
dst += line_padding; dst += line_padding;
......
...@@ -24,8 +24,10 @@ ...@@ -24,8 +24,10 @@
#include "libavutil/pixfmt.h" #include "libavutil/pixfmt.h"
typedef struct { typedef struct {
void (*pack_line_8)(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width); void (*pack_line_8)(const uint8_t *y, const uint8_t *u,
void (*pack_line_10)(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width); const uint8_t *v, uint8_t *dst, ptrdiff_t width);
void (*pack_line_10)(const uint16_t *y, const uint16_t *u,
const uint16_t *v, uint8_t *dst, ptrdiff_t width);
} V210EncContext; } V210EncContext;
void ff_v210enc_init_x86(V210EncContext *s); void ff_v210enc_init_x86(V210EncContext *s);
......
...@@ -19,19 +19,24 @@ ...@@ -19,19 +19,24 @@
#include "libavutil/x86/cpu.h" #include "libavutil/x86/cpu.h"
#include "libavcodec/v210enc.h" #include "libavcodec/v210enc.h"
void ff_v210_planar_pack_8_ssse3(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width); void ff_v210_planar_pack_8_ssse3(const uint8_t *y, const uint8_t *u,
void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width); const uint8_t *v, uint8_t *dst,
void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width); ptrdiff_t width);
void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u,
const uint8_t *v, uint8_t *dst, ptrdiff_t width);
void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u,
const uint16_t *v, uint8_t *dst,
ptrdiff_t width);
av_cold void ff_v210enc_init_x86(V210EncContext *s) av_cold void ff_v210enc_init_x86(V210EncContext *s)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if( EXTERNAL_SSSE3(cpu_flags) ) { if (EXTERNAL_SSSE3(cpu_flags)) {
s->pack_line_8 = ff_v210_planar_pack_8_ssse3; s->pack_line_8 = ff_v210_planar_pack_8_ssse3;
s->pack_line_10 = ff_v210_planar_pack_10_ssse3; s->pack_line_10 = ff_v210_planar_pack_10_ssse3;
} }
if( EXTERNAL_AVX(cpu_flags) ) if (EXTERNAL_AVX(cpu_flags))
s->pack_line_8 = ff_v210_planar_pack_8_avx; s->pack_line_8 = ff_v210_planar_pack_8_avx;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment