Commit 0724b4a1 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '62844c3f'

* commit '62844c3f':
  h264: Integrate clear_blocks calls with IDCT

Conflicts:
	libavcodec/arm/h264idct_neon.S
	libavcodec/h264idct_template.c
	libavcodec/x86/h264_idct.asm
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 944ad461 62844c3f
...@@ -187,8 +187,8 @@ endfunc ...@@ -187,8 +187,8 @@ endfunc
vshr.s16 q2, q10, #1 vshr.s16 q2, q10, #1
vadd.i16 q0, q8, q12 vadd.i16 q0, q8, q12
vld1.16 {q14-q15},[r1,:128] vld1.16 {q14-q15},[r1,:128]
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
vsub.i16 q1, q8, q12 vsub.i16 q1, q8, q12
vshr.s16 q3, q14, #1 vshr.s16 q3, q14, #1
vsub.i16 q2, q2, q14 vsub.i16 q2, q2, q14
...@@ -267,16 +267,16 @@ endfunc ...@@ -267,16 +267,16 @@ endfunc
.endm .endm
function ff_h264_idct8_add_neon, export=1 function ff_h264_idct8_add_neon, export=1
vmov.i16 q7, #0 vmov.i16 q3, #0
vld1.16 {q8-q9}, [r1,:128] vld1.16 {q8-q9}, [r1,:128]
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
vld1.16 {q10-q11},[r1,:128] vld1.16 {q10-q11},[r1,:128]
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
vld1.16 {q12-q13},[r1,:128] vld1.16 {q12-q13},[r1,:128]
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
vst1.16 {q7}, [r1,:128]! vst1.16 {q3}, [r1,:128]!
idct8x8_cols 0 idct8x8_cols 0
idct8x8_cols 1 idct8x8_cols 1
......
...@@ -145,7 +145,7 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ ...@@ -145,7 +145,7 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
pixel *dst = (pixel*)_dst; pixel *dst = (pixel*)_dst;
dctcoef *block = (dctcoef*)_block; dctcoef *block = (dctcoef*)_block;
int dc = (block[0] + 32) >> 6; int dc = (block[0] + 32) >> 6;
stride >>= sizeof(pixel)-1; stride /= sizeof(pixel);
block[0] = 0; block[0] = 0;
for( j = 0; j < 4; j++ ) for( j = 0; j < 4; j++ )
{ {
...@@ -161,7 +161,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){ ...@@ -161,7 +161,7 @@ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, int16_t *_block, int stride){
dctcoef *block = (dctcoef*)_block; dctcoef *block = (dctcoef*)_block;
int dc = (block[0] + 32) >> 6; int dc = (block[0] + 32) >> 6;
block[0] = 0; block[0] = 0;
stride >>= sizeof(pixel)-1; stride /= sizeof(pixel);
for( j = 0; j < 8; j++ ) for( j = 0; j < 8; j++ )
{ {
for( i = 0; i < 8; i++ ) for( i = 0; i < 8; i++ )
......
...@@ -312,7 +312,7 @@ INIT_MMX mmxext ...@@ -312,7 +312,7 @@ INIT_MMX mmxext
%if ARCH_X86_64 %if ARCH_X86_64
cglobal h264_idct_dc_add_8, 3, 4, 0 cglobal h264_idct_dc_add_8, 3, 4, 0
movsx r3, word [r1] movsx r3, word [r1]
mov word [r1], 0 mov dword [r1], 0
DC_ADD_MMXEXT_INIT r3, r2 DC_ADD_MMXEXT_INIT r3, r2
DC_ADD_MMXEXT_OP movh, r0, r2, r3 DC_ADD_MMXEXT_OP movh, r0, r2, r3
RET RET
...@@ -320,7 +320,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0 ...@@ -320,7 +320,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride) ; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8, 3, 4, 0 cglobal h264_idct8_dc_add_8, 3, 4, 0
movsx r3, word [r1] movsx r3, word [r1]
mov word [r1], 0 mov dword [r1], 0
DC_ADD_MMXEXT_INIT r3, r2 DC_ADD_MMXEXT_INIT r3, r2
DC_ADD_MMXEXT_OP mova, r0, r2, r3 DC_ADD_MMXEXT_OP mova, r0, r2, r3
lea r0, [r0+r2*4] lea r0, [r0+r2*4]
...@@ -329,7 +329,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0 ...@@ -329,7 +329,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0
%else %else
cglobal h264_idct_dc_add_8, 2, 3, 0 cglobal h264_idct_dc_add_8, 2, 3, 0
movsx r2, word [r1] movsx r2, word [r1]
mov word [r1], 0 mov dword [r1], 0
mov r1, r2m mov r1, r2m
DC_ADD_MMXEXT_INIT r2, r1 DC_ADD_MMXEXT_INIT r2, r1
DC_ADD_MMXEXT_OP movh, r0, r1, r2 DC_ADD_MMXEXT_OP movh, r0, r1, r2
...@@ -338,7 +338,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0 ...@@ -338,7 +338,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0
; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride) ; ff_h264_idct8_dc_add_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8, 2, 3, 0 cglobal h264_idct8_dc_add_8, 2, 3, 0
movsx r2, word [r1] movsx r2, word [r1]
mov word [r1], 0 mov dword [r1], 0
mov r1, r2m mov r1, r2m
DC_ADD_MMXEXT_INIT r2, r1 DC_ADD_MMXEXT_INIT r2, r1
DC_ADD_MMXEXT_OP mova, r0, r1, r2 DC_ADD_MMXEXT_OP mova, r0, r1, r2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment