Commit 8f9c38b1 authored by Martin Vignali's avatar Martin Vignali

avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction

asm code by Henrik Gramner
parent 3a230ce5
...@@ -74,10 +74,25 @@ static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1, ...@@ -74,10 +74,25 @@ static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1,
*left_top = lt; *left_top = lt;
} }
static void sub_left_predict_c(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height)
{
int i, j;
uint8_t prev = 0x80; /* Set the initial value */
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
*dst++ = src[i] - prev;
prev = src[i];
}
src += stride;
}
}
av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c) av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c)
{ {
c->diff_bytes = diff_bytes_c; c->diff_bytes = diff_bytes_c;
c->sub_median_pred = sub_median_pred_c; c->sub_median_pred = sub_median_pred_c;
c->sub_left_predict = sub_left_predict_c;
if (ARCH_X86) if (ARCH_X86)
ff_llvidencdsp_init_x86(c); ff_llvidencdsp_init_x86(c);
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include <stdint.h> #include <stdint.h>
#include "avcodec.h"
typedef struct LLVidEncDSPContext { typedef struct LLVidEncDSPContext {
void (*diff_bytes)(uint8_t *dst /* align 16 */, void (*diff_bytes)(uint8_t *dst /* align 16 */,
const uint8_t *src1 /* align 16 */, const uint8_t *src1 /* align 16 */,
...@@ -33,6 +35,9 @@ typedef struct LLVidEncDSPContext { ...@@ -33,6 +35,9 @@ typedef struct LLVidEncDSPContext {
void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1, void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w, const uint8_t *src2, intptr_t w,
int *left, int *left_top); int *left, int *left_top);
void (*sub_left_predict)(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height);
} LLVidEncDSPContext; } LLVidEncDSPContext;
void ff_llvidencdsp_init(LLVidEncDSPContext *c); void ff_llvidencdsp_init(LLVidEncDSPContext *c);
......
...@@ -283,23 +283,6 @@ static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride, ...@@ -283,23 +283,6 @@ static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride,
} }
} }
/* Write data to a plane with left prediction */
static void left_predict(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
int width, int height)
{
int i, j;
uint8_t prev;
prev = 0x80; /* Set the initial value */
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
*dst++ = src[i] - prev;
prev = src[i];
}
src += stride;
}
}
#undef A #undef A
#undef B #undef B
...@@ -436,8 +419,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src, ...@@ -436,8 +419,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
for (i = 0; i < c->slices; i++) { for (i = 0; i < c->slices; i++) {
sstart = send; sstart = send;
send = height * (i + 1) / c->slices & cmask; send = height * (i + 1) / c->slices & cmask;
left_predict(src + sstart * stride, dst + sstart * width, c->llvidencdsp.sub_left_predict(dst + sstart * width, src + sstart * stride, stride, width, send - sstart);
stride, width, send - sstart);
} }
break; break;
case PRED_MEDIAN: case PRED_MEDIAN:
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
%include "libavutil/x86/x86util.asm" %include "libavutil/x86/x86util.asm"
cextern pb_80
SECTION .text SECTION .text
; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
...@@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE ...@@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE
DIFF_BYTES_BODY u, u DIFF_BYTES_BODY u, u
%undef i %undef i
%endif %endif
;--------------------------------------------------------------------------------------------------
;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height)
;--------------------------------------------------------------------------------------------------
INIT_XMM avx
cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x
mova m1, [pb_80] ; prev initial
add dstq, widthq
add srcq, widthq
lea xd, [widthq-1]
neg widthq
and xd, 15
pinsrb m4, m1, xd, 15
mov xq, widthq
.loop:
movu m0, [srcq + widthq]
palignr m2, m0, m1, 15
movu m1, [srcq + widthq + 16]
palignr m3, m1, m0, 15
psubb m2, m0, m2
psubb m3, m1, m3
movu [dstq + widthq], m2
movu [dstq + widthq + 16], m3
add widthq, 2 * 16
jl .loop
add srcq, strideq
sub dstq, xq ; dst + width
test xd, 16
jz .mod32
mova m1, m0
.mod32:
pshufb m1, m4
mov widthq, xq
dec heightd
jg .loop
RET
...@@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ...@@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w); intptr_t w);
void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height);
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
...@@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) ...@@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
c->diff_bytes = ff_diff_bytes_sse2; c->diff_bytes = ff_diff_bytes_sse2;
} }
if (EXTERNAL_AVX(cpu_flags)) {
c->sub_left_predict = ff_sub_left_predict_avx;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) { if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_avx2; c->diff_bytes = ff_diff_bytes_avx2;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment