Commit 0724b4a1 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '62844c3f'

* commit '62844c3f':
  h264: Integrate clear_blocks calls with IDCT

Conflicts:
	libavcodec/arm/h264idct_neon.S
	libavcodec/h264idct_template.c
	libavcodec/x86/h264_idct.asm
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 944ad461 62844c3f
......@@ -187,8 +187,8 @@ endfunc
vshr.s16 q2, q10, #1
vadd.i16 q0, q8, q12
vld1.16 {q14-q15},[r1,:128]
vst1.16 {q7}, [r1,:128]!
vst1.16 {q7}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
vsub.i16 q1, q8, q12
vshr.s16 q3, q14, #1
vsub.i16 q2, q2, q14
......@@ -267,16 +267,16 @@ endfunc
.endm
function ff_h264_idct8_add_neon, export=1
vmov.i16 q7, #0
vmov.i16 q3, #0
vld1.16 {q8-q9}, [r1,:128]
vst1.16 {q7}, [r1,:128]!
vst1.16 {q7}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
vld1.16 {q10-q11},[r1,:128]
vst1.16 {q7}, [r1,:128]!
vst1.16 {q7}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
vld1.16 {q12-q13},[r1,:128]
vst1.16 {q7}, [r1,:128]!
vst1.16 {q7}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
vst1.16 {q3}, [r1,:128]!
idct8x8_cols 0
idct8x8_cols 1
......
......@@ -145,7 +145,7 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
pixel *dst = (pixel*)_dst;
dctcoef *block = (dctcoef*)_block;
int dc = (block[0] + 32) >> 6;
stride >>= sizeof(pixel)-1;
stride /= sizeof(pixel);
block[0] = 0;
for( j = 0; j < 4; j++ )
{
......@@ -161,7 +161,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
dctcoef *block = (dctcoef*)_block;
int dc = (block[0] + 32) >> 6;
block[0] = 0;
stride >>= sizeof(pixel)-1;
stride /= sizeof(pixel);
for( j = 0; j < 8; j++ )
{
for( i = 0; i < 8; i++ )
......
......@@ -312,7 +312,7 @@ INIT_MMX mmxext
%if ARCH_X86_64
cglobal h264_idct_dc_add_8, 3, 4, 0
movsx r3, word [r1]
mov word [r1], 0
mov dword [r1], 0
DC_ADD_MMXEXT_INIT r3, r2
DC_ADD_MMXEXT_OP movh, r0, r2, r3
RET
......@@ -320,7 +320,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8, 3, 4, 0
movsx r3, word [r1]
mov word [r1], 0
mov dword [r1], 0
DC_ADD_MMXEXT_INIT r3, r2
DC_ADD_MMXEXT_OP mova, r0, r2, r3
lea r0, [r0+r2*4]
......@@ -329,7 +329,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0
%else
cglobal h264_idct_dc_add_8, 2, 3, 0
movsx r2, word [r1]
mov word [r1], 0
mov dword [r1], 0
mov r1, r2m
DC_ADD_MMXEXT_INIT r2, r1
DC_ADD_MMXEXT_OP movh, r0, r1, r2
......@@ -338,7 +338,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8, 2, 3, 0
movsx r2, word [r1]
mov word [r1], 0
mov dword [r1], 0
mov r1, r2m
DC_ADD_MMXEXT_INIT r2, r1
DC_ADD_MMXEXT_OP mova, r0, r1, r2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment