Commit 30c1f272 authored by James Almer's avatar James Almer

huffyuvencdsp: move functions only used by huffyuv from lossless_videodsp

Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 5ac1dd8e
......@@ -2430,7 +2430,7 @@ hap_encoder_deps="libsnappy"
hap_encoder_select="texturedspenc"
hevc_decoder_select="bswapdsp cabac golomb videodsp"
huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llviddsp"
huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp"
iac_decoder_select="imc_decoder"
imc_decoder_select="bswapdsp fft mdct sinewin"
indeo3_decoder_select="hpeldsp"
......
......@@ -76,7 +76,6 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx)
s->flags = avctx->flags;
ff_bswapdsp_init(&s->bdsp);
ff_llviddsp_init(&s->llviddsp, avctx);
s->width = avctx->width;
s->height = avctx->height;
......
......@@ -298,6 +298,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
return ret;
ff_huffyuvdsp_init(&s->hdsp);
ff_llviddsp_init(&s->llviddsp, avctx);
memset(s->vlc, 0, 4 * sizeof(VLC));
s->interlaced = avctx->height > 288;
......
......@@ -43,7 +43,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
if (s->bps <= 8) {
s->hencdsp.diff_bytes(dst, src0, src1, w);
} else {
s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
s->hencdsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
}
}
......@@ -84,7 +84,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
dst16[i] = temp - left;
left = temp;
}
s->llviddsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
s->hencdsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
return src16[w-1];
}
}
......@@ -158,7 +158,7 @@ static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *s
if (s->bps <= 8) {
s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top);
} else {
s->llviddsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
s->hencdsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
}
}
......@@ -217,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
ff_huffyuv_common_init(avctx);
ff_huffyuvencdsp_init(&s->hencdsp);
ff_huffyuvencdsp_init(&s->hencdsp, avctx);
avctx->extradata = av_mallocz(3*MAX_N + 4);
if (s->flags&AV_CODEC_FLAG_PASS1) {
......
......@@ -53,6 +53,32 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
dst[i + 0] = src1[i + 0] - src2[i + 0];
}
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
long i;
#if !HAVE_FAST_UNALIGNED
if((long)src2 & (sizeof(long)-1)){
for(i=0; i+3<w; i+=4){
dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
}
}else
#endif
{
unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
long a = *(long*)(src1+i);
long b = *(long*)(src2+i);
*(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
}
}
for (; i<w; i++)
dst[i] = (src1[i] - src2[i]) & mask;
}
static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w,
int *left, int *left_top)
......@@ -74,11 +100,31 @@ static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
*left_top = lt;
}
av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c)
static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){
int i;
uint16_t l, lt;
l = *left;
lt = *left_top;
for(i=0; i<w; i++){
const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & mask);
lt = src1[i];
l = src2[i];
dst[i] = (l - pred) & mask;
}
*left = l;
*left_top = lt;
}
av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx)
{
c->diff_bytes = diff_bytes_c;
c->diff_int16 = diff_int16_c;
c->sub_hfyu_median_pred = sub_hfyu_median_pred_c;
c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c;
if (ARCH_X86)
ff_huffyuvencdsp_init_x86(c);
ff_huffyuvencdsp_init_x86(c, avctx);
}
......@@ -21,11 +21,18 @@
#include <stdint.h>
#include "avcodec.h"
typedef struct HuffYUVEncDSPContext {
void (*diff_bytes)(uint8_t *dst /* align 16 */,
const uint8_t *src1 /* align 16 */,
const uint8_t *src2 /* align 1 */,
intptr_t w);
void (*diff_int16)(uint16_t *dst /* align 16 */,
const uint16_t *src1 /* align 16 */,
const uint16_t *src2 /* align 1 */,
unsigned mask, int w);
/**
* Subtract HuffYUV's variant of median prediction.
* Note, this might read from src1[-1], src2[-1].
......@@ -33,9 +40,12 @@ typedef struct HuffYUVEncDSPContext {
void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w,
int *left, int *left_top);
void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1,
const uint16_t *src2, unsigned mask,
int w, int *left, int *left_top);
} HuffYUVEncDSPContext;
void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c);
void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c);
void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx);
void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx);
#endif /* AVCODEC_HUFFYUVENCDSP_H */
......@@ -92,32 +92,6 @@ static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w
dst[i] = (dst[i] + src[i]) & mask;
}
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
long i;
#if !HAVE_FAST_UNALIGNED
if((long)src2 & (sizeof(long)-1)){
for(i=0; i+3<w; i+=4){
dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
}
}else
#endif
{
unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL;
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
long a = *(long*)(src1+i);
long b = *(long*)(src2+i);
*(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
}
}
for (; i<w; i++)
dst[i] = (src1[i] - src2[i]) & mask;
}
static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top){
int i;
uint16_t l, lt;
......@@ -135,24 +109,6 @@ static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, con
*left_top = lt;
}
static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){
int i;
uint16_t l, lt;
l = *left;
lt = *left_top;
for(i=0; i<w; i++){
const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & mask);
lt = src1[i];
l = src2[i];
dst[i] = (l - pred) & mask;
}
*left = l;
*left_top = lt;
}
static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc){
int i;
......@@ -180,10 +136,8 @@ void ff_llviddsp_init(LLVidDSPContext *c, AVCodecContext *avctx)
c->add_left_pred = add_left_pred_c;
c->add_int16 = add_int16_c;
c->diff_int16= diff_int16_c;
c->add_hfyu_left_pred_int16 = add_hfyu_left_pred_int16_c;
c->add_hfyu_median_pred_int16 = add_hfyu_median_pred_int16_c;
c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c;
if (ARCH_X86)
ff_llviddsp_init_x86(c, avctx);
......
......@@ -35,9 +35,7 @@ typedef struct LLVidDSPContext {
intptr_t w, int left);
void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
void (*add_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
int (*add_hfyu_left_pred_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned left);
} LLVidDSPContext;
......
......@@ -1015,7 +1015,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
FF_ENABLE_DEPRECATION_WARNINGS
#endif
ff_huffyuvencdsp_init(&s->hdsp);
ff_huffyuvencdsp_init(&s->hdsp, avctx);
#if FF_API_PRIVATE_OPT
FF_DISABLE_DEPRECATION_WARNINGS
......
......@@ -120,7 +120,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
}
ff_bswapdsp_init(&c->bdsp);
ff_huffyuvencdsp_init(&c->hdsp);
ff_huffyuvencdsp_init(&c->hdsp, avctx);
#if FF_API_PRIVATE_OPT
FF_DISABLE_DEPRECATION_WARNINGS
......
......@@ -148,3 +148,116 @@ DIFF_BYTES_PROLOGUE
DIFF_BYTES_BODY u, u
%undef i
%endif
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd
SPLATW m4, m4
add wd, wd
test wq, 2*mmsize - 1
jz %%.tomainloop
push tmpq
%%.wordloop:
sub wq, 2
%ifidn %2, add
mov tmpw, [srcq+wq]
add tmpw, [dstq+wq]
%else
mov tmpw, [src1q+wq]
sub tmpw, [src2q+wq]
%endif
and tmpw, maskw
mov [dstq+wq], tmpw
test wq, 2*mmsize - 1
jnz %%.wordloop
pop tmpq
%%.tomainloop:
%ifidn %2, add
add srcq, wq
%else
add src1q, wq
add src2q, wq
%endif
add dstq, wq
neg wq
jz %%.end
%%.loop:
%ifidn %2, add
mov%1 m0, [srcq+wq]
mov%1 m1, [dstq+wq]
mov%1 m2, [srcq+wq+mmsize]
mov%1 m3, [dstq+wq+mmsize]
%else
mov%1 m0, [src1q+wq]
mov%1 m1, [src2q+wq]
mov%1 m2, [src1q+wq+mmsize]
mov%1 m3, [src2q+wq+mmsize]
%endif
p%2w m0, m1
p%2w m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
mov%1 [dstq+wq+mmsize], m2
add wq, 2*mmsize
jl %%.loop
%%.end:
RET
%endmacro
%if ARCH_X86_32
INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
INT16_LOOP a, sub
%endif
INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
test src1q, mmsize-1
jnz .unaligned
test src2q, mmsize-1
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
INT16_LOOP a, sub
.unaligned:
INT16_LOOP u, sub
INIT_MMX mmxext
cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
add wd, wd
movd mm7, maskd
SPLATW mm7, mm7
movq mm0, [src1q]
movq mm2, [src2q]
psllq mm0, 16
psllq mm2, 16
movd mm6, [left_topq]
por mm0, mm6
movd mm6, [leftq]
por mm2, mm6
xor maskq, maskq
.loop:
movq mm1, [src1q + maskq]
movq mm3, [src2q + maskq]
movq mm4, mm2
psubw mm2, mm0
paddw mm2, mm1
pand mm2, mm7
movq mm5, mm4
pmaxsw mm4, mm1
pminsw mm1, mm5
pminsw mm4, mm2
pmaxsw mm4, mm1
psubw mm3, mm4
pand mm3, mm7
movq [dstq + maskq], mm3
add maskq, 8
movq mm0, [src1q + maskq - 2]
movq mm2, [src2q + maskq - 2]
cmp maskq, wq
jb .loop
movzx maskd, word [src1q + wq - 2]
mov [left_topq], maskd
movzx maskd, word [src2q + wq - 2]
mov [leftq], maskd
RET
......@@ -24,6 +24,7 @@
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/pixdesc.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/huffyuvencdsp.h"
......@@ -35,6 +36,12 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);
void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
unsigned mask, int w);
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
unsigned mask, int w);
void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
unsigned mask, int w, int *left, int *left_top);
#if HAVE_INLINE_ASM
......@@ -80,12 +87,14 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
#endif /* HAVE_INLINE_ASM */
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx)
{
av_unused int cpu_flags = av_get_cpu_flags();
const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_mmx;
c->diff_int16 = ff_diff_int16_mmx;
}
#if HAVE_INLINE_ASM
......@@ -94,8 +103,13 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
}
#endif /* HAVE_INLINE_ASM */
if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_sse2;
c->diff_int16 = ff_diff_int16_sse2;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
......
......@@ -288,25 +288,6 @@ cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
.unaligned:
INT16_LOOP u, add
%if ARCH_X86_32
INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
INT16_LOOP a, sub
%endif
INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
test src1q, mmsize-1
jnz .unaligned
test src2q, mmsize-1
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
INT16_LOOP a, sub
.unaligned:
INT16_LOOP u, sub
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
add wd, wd
add srcq, wq
......@@ -443,42 +424,3 @@ cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_t
movzx r2d, word [topq-2]
mov [left_topq], r2d
RET
cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
add wd, wd
movd mm7, maskd
SPLATW mm7, mm7
movq mm0, [src1q]
movq mm2, [src2q]
psllq mm0, 16
psllq mm2, 16
movd mm6, [left_topq]
por mm0, mm6
movd mm6, [leftq]
por mm2, mm6
xor maskq, maskq
.loop:
movq mm1, [src1q + maskq]
movq mm3, [src2q + maskq]
movq mm4, mm2
psubw mm2, mm0
paddw mm2, mm1
pand mm2, mm7
movq mm5, mm4
pmaxsw mm4, mm1
pminsw mm1, mm5
pminsw mm4, mm2
pmaxsw mm4, mm1
psubw mm3, mm4
pand mm3, mm7
movq [dstq + maskq], mm3
add maskq, 8
movq mm0, [src1q + maskq - 2]
movq mm2, [src2q + maskq - 2]
cmp maskq, wq
jb .loop
movzx maskd, word [src1q + wq - 2]
mov [left_topq], maskd
movzx maskd, word [src2q + wq - 2]
mov [leftq], maskd
RET
......@@ -41,12 +41,9 @@ int ff_add_left_pred_sse4(uint8_t *dst, const uint8_t *src,
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
......@@ -98,9 +95,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
c->add_bytes = ff_add_bytes_mmx;
c->add_int16 = ff_add_int16_mmx;
c->diff_int16 = ff_diff_int16_mmx;
}
if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
......@@ -111,7 +106,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
......@@ -119,7 +113,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
c->add_median_pred = ff_add_median_pred_sse2;
c->add_int16 = ff_add_int16_sse2;
c->diff_int16 = ff_diff_int16_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment