Commit ef00ef75 authored by Michael Niedermayer's avatar Michael Niedermayer

avcodec/x86/lossless_videodsp: port sub_hfyu_median_prediction_int16 to yasm

Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent fad49aae
......@@ -299,3 +299,42 @@ cglobal add_hfyu_median_prediction_int16, 7,7,0, dst, top, diff, mask, w, left,
movzx r2d, word [topq-2]
mov [left_topq], r2d
RET
cglobal sub_hfyu_median_prediction_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
add wq, wq
movd mm7, maskd
SPLATW mm7, mm7
movq mm0, [src1q]
movq mm2, [src2q]
psllq mm0, 16
psllq mm2, 16
movd mm6, [left_topq]
por mm0, mm6
movd mm6, [leftq]
por mm2, mm6
xor maskq, maskq
.loop:
movq mm1, [src1q + maskq]
movq mm3, [src2q + maskq]
movq mm4, mm2
psubw mm2, mm0
paddw mm2, mm1
pand mm2, mm7
movq mm5, mm4
pmaxsw mm4, mm1
pminsw mm1, mm5
pminsw mm4, mm2
pmaxsw mm4, mm1
psubw mm3, mm4
pand mm3, mm7
movq [dstq + maskq], mm3
add maskq, 8
movq mm0, [src1q + maskq - 2]
movq mm2, [src2q + maskq - 2]
cmp maskq, wq
jb .loop
mov maskd, [src1q + wq - 2]
mov [left_topq], maskd
mov maskd, [src2q + wq - 2]
mov [leftq], maskd
RET
......@@ -20,8 +20,6 @@
#include "../lossless_videodsp.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavcodec/mathops.h"
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
......@@ -30,51 +28,8 @@ void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src
int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
void ff_add_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
void ff_sub_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
static void sub_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *src1,
const uint16_t *src2, unsigned mask, int w,
int *left, int *left_top)
{
x86_reg i=0;
uint16_t l, lt;
__asm__ volatile(
"movd %5, %%mm7 \n\t"
"pshufw $0, %%mm7, %%mm7 \n\t"
"movq (%1, %0), %%mm0 \n\t" // LT
"psllq $16, %%mm0 \n\t"
"1: \n\t"
"movq (%1, %0), %%mm1 \n\t" // T
"movq -2(%2, %0), %%mm2 \n\t" // L
"movq (%2, %0), %%mm3 \n\t" // X
"movq %%mm2, %%mm4 \n\t" // L
"psubw %%mm0, %%mm2 \n\t"
"paddw %%mm1, %%mm2 \n\t" // L + T - LT
"pand %%mm7, %%mm2 \n\t"
"movq %%mm4, %%mm5 \n\t" // L
"pmaxsw %%mm1, %%mm4 \n\t" // max(T, L)
"pminsw %%mm5, %%mm1 \n\t" // min(T, L)
"pminsw %%mm2, %%mm4 \n\t"
"pmaxsw %%mm1, %%mm4 \n\t"
"psubw %%mm4, %%mm3 \n\t" // dst - pred
"pand %%mm7, %%mm3 \n\t"
"movq %%mm3, (%3, %0) \n\t"
"add $8, %0 \n\t"
"movq -2(%1, %0), %%mm0 \n\t" // LT
"cmp %4, %0 \n\t"
" jb 1b \n\t"
: "+r" (i)
: "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)2*w), "rm"(mask)
);
l= *left;
lt= *left_top;
dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&mask);
*left_top= src1[w-1];
*left = src2[w-1];
}
void ff_llviddsp_init_x86(LLVidDSPContext *c)
{
......@@ -87,7 +42,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->add_hfyu_median_prediction_int16 = ff_add_hfyu_median_prediction_int16_mmxext;
c->sub_hfyu_median_prediction_int16 = sub_hfyu_median_prediction_int16_mmxext;
c->sub_hfyu_median_prediction_int16 = ff_sub_hfyu_median_prediction_int16_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment