Commit cf061a9c authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit 'aeaf268e'

* commit 'aeaf268e':
  vp3: integrate clear_blocks with idct of previous block.
  mpegvideo: fix loop condition in draw_line()
  dvdsubdec: parse the size from the extradata

Conflicts:
	libavcodec/dvdsubdec.c
	libavcodec/mpegvideo.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents b3b456b2 aeaf268e
...@@ -108,14 +108,20 @@ endfunc ...@@ -108,14 +108,20 @@ endfunc
function vp3_idct_start_neon function vp3_idct_start_neon
vpush {d8-d15} vpush {d8-d15}
vmov.i16 q4, #0
vmov.i16 q5, #0
movrel r3, vp3_idct_constants movrel r3, vp3_idct_constants
vld1.64 {d0-d1}, [r3,:128] vld1.64 {d0-d1}, [r3,:128]
vld1.64 {d16-d19}, [r2,:128]! vld1.64 {d16-d19}, [r2,:128]
vld1.64 {d20-d23}, [r2,:128]! vst1.64 {q4-q5}, [r2,:128]!
vld1.64 {d24-d27}, [r2,:128]! vld1.64 {d20-d23}, [r2,:128]
vst1.64 {q4-q5}, [r2,:128]!
vld1.64 {d24-d27}, [r2,:128]
vst1.64 {q4-q5}, [r2,:128]!
vadd.s16 q1, q8, q12 vadd.s16 q1, q8, q12
vsub.s16 q8, q8, q12 vsub.s16 q8, q8, q12
vld1.64 {d28-d31}, [r2,:128]! vld1.64 {d28-d31}, [r2,:128]
vst1.64 {q4-q5}, [r2,:128]!
vp3_idct_core_neon: vp3_idct_core_neon:
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16 vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
...@@ -345,10 +351,12 @@ function ff_vp3_idct_add_neon, export=1 ...@@ -345,10 +351,12 @@ function ff_vp3_idct_add_neon, export=1
endfunc endfunc
function ff_vp3_idct_dc_add_neon, export=1 function ff_vp3_idct_dc_add_neon, export=1
ldrsh r2, [r2] ldrsh r12, [r2]
mov r3, r0 mov r3, r0
add r2, r2, #15 add r12, r12, #15
vdup.16 q15, r2 vdup.16 q15, r12
mov r12, 0
strh r12, [r2]
vshr.s16 q15, q15, #5 vshr.s16 q15, q15, #5
vld1.8 {d0}, [r0,:64], r1 vld1.8 {d0}, [r0,:64], r1
......
...@@ -1666,7 +1666,7 @@ static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, ...@@ -1666,7 +1666,7 @@ static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey,
buf += sx + sy * stride; buf += sx + sy * stride;
ex -= sx; ex -= sx;
f = ((ey - sy) << 16) / ex; f = ((ey - sy) << 16) / ex;
for(x= 0; x <= ex; x++){ for (x = 0; x <= ex; x++) {
y = (x * f) >> 16; y = (x * f) >> 16;
fr = (x * f) & 0xFFFF; fr = (x * f) & 0xFFFF;
buf[y * stride + x] += (color * (0x10000 - fr)) >> 16; buf[y * stride + x] += (color * (0x10000 - fr)) >> 16;
......
...@@ -140,6 +140,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) ...@@ -140,6 +140,7 @@ static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
PUT(b5) dst += stride; PUT(b5) dst += stride;
PUT(b6) dst += stride; PUT(b6) dst += stride;
PUT(b7) PUT(b7)
memset(block, 0, sizeof(*block) * 64);
} }
static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
...@@ -171,6 +172,7 @@ static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) ...@@ -171,6 +172,7 @@ static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
ADD(b5) dst += stride; ADD(b5) dst += stride;
ADD(b6) dst += stride; ADD(b6) dst += stride;
ADD(b7) ADD(b7)
memset(block, 0, sizeof(*block) * 64);
} }
#endif /* HAVE_ALTIVEC */ #endif /* HAVE_ALTIVEC */
......
...@@ -138,6 +138,7 @@ typedef struct Vp3DecodeContext { ...@@ -138,6 +138,7 @@ typedef struct Vp3DecodeContext {
DSPContext dsp; DSPContext dsp;
VideoDSPContext vdsp; VideoDSPContext vdsp;
VP3DSPContext vp3dsp; VP3DSPContext vp3dsp;
DECLARE_ALIGNED(16, DCTELEM, block)[64];
int flipped_image; int flipped_image;
int last_slice_end; int last_slice_end;
int skip_loop_filter; int skip_loop_filter;
...@@ -1458,7 +1459,7 @@ static void await_reference_row(Vp3DecodeContext *s, Vp3Fragment *fragment, int ...@@ -1458,7 +1459,7 @@ static void await_reference_row(Vp3DecodeContext *s, Vp3Fragment *fragment, int
static void render_slice(Vp3DecodeContext *s, int slice) static void render_slice(Vp3DecodeContext *s, int slice)
{ {
int x, y, i, j, fragment; int x, y, i, j, fragment;
LOCAL_ALIGNED_16(DCTELEM, block, [64]); DCTELEM *block = s->block;
int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
int motion_halfpel_index; int motion_halfpel_index;
uint8_t *motion_source; uint8_t *motion_source;
...@@ -1571,8 +1572,6 @@ static void render_slice(Vp3DecodeContext *s, int slice) ...@@ -1571,8 +1572,6 @@ static void render_slice(Vp3DecodeContext *s, int slice)
} }
} }
s->dsp.clear_block(block);
/* invert DCT and place (or add) in final output */ /* invert DCT and place (or add) in final output */
if (s->all_fragments[i].coding_method == MODE_INTRA) { if (s->all_fragments[i].coding_method == MODE_INTRA) {
......
...@@ -215,14 +215,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int ...@@ -215,14 +215,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
idct(dest, line_size, block, 1); idct(dest, line_size, block, 1);
memset(block, 0, sizeof(*block) * 64);
} }
static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
idct(dest, line_size, block, 2); idct(dest, line_size, block, 2);
memset(block, 0, sizeof(*block) * 64);
} }
static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
const DCTELEM *block/*align 16*/){ DCTELEM *block/*align 16*/){
int i, dc = (block[0] + 15) >> 5; int i, dc = (block[0] + 15) >> 5;
for(i = 0; i < 8; i++){ for(i = 0; i < 8; i++){
...@@ -236,6 +238,7 @@ static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, ...@@ -236,6 +238,7 @@ static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
dest[7] = av_clip_uint8(dest[7] + dc); dest[7] = av_clip_uint8(dest[7] + dc);
dest += line_size; dest += line_size;
} }
block[0] = 0;
} }
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
typedef struct VP3DSPContext { typedef struct VP3DSPContext {
void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block); void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block); void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block); void (*idct_dc_add)(uint8_t *dest, int line_size, DCTELEM *block);
void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values); void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values); void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
......
...@@ -561,6 +561,13 @@ cglobal vp3_idct_put, 3, 4, 9 ...@@ -561,6 +561,13 @@ cglobal vp3_idct_put, 3, 4, 9
movhps [r0+r3 ], m3 movhps [r0+r3 ], m3
%endif %endif
%assign %%i %%i+64 %assign %%i %%i+64
%endrep
pxor m0, m0
%assign %%offset 0
%rep 128/mmsize
mova [r2+%%offset], m0
%assign %%offset %%offset+mmsize
%endrep %endrep
RET RET
...@@ -600,6 +607,11 @@ cglobal vp3_idct_add, 3, 4, 9 ...@@ -600,6 +607,11 @@ cglobal vp3_idct_add, 3, 4, 9
movhps [r0+r1], m0 movhps [r0+r1], m0
%endif %endif
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
%assign %%offset 0
%rep 32/mmsize
mova [r2+%%offset], m4
%assign %%offset %%offset+mmsize
%endrep
add r2, 32 add r2, 32
dec r3 dec r3
jg .loop jg .loop
...@@ -620,7 +632,7 @@ vp3_idct_funcs ...@@ -620,7 +632,7 @@ vp3_idct_funcs
paddusb m2, m0 paddusb m2, m0
movq m4, [r0+r1*2] movq m4, [r0+r1*2]
paddusb m3, m0 paddusb m3, m0
movq m5, [r0+r3 ] movq m5, [r0+r2 ]
paddusb m4, m0 paddusb m4, m0
paddusb m5, m0 paddusb m5, m0
psubusb m2, m1 psubusb m2, m1
...@@ -630,7 +642,7 @@ vp3_idct_funcs ...@@ -630,7 +642,7 @@ vp3_idct_funcs
movq [r0+r1 ], m3 movq [r0+r1 ], m3
psubusb m5, m1 psubusb m5, m1
movq [r0+r1*2], m4 movq [r0+r1*2], m4
movq [r0+r3 ], m5 movq [r0+r2 ], m5
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
...@@ -638,11 +650,12 @@ cglobal vp3_idct_dc_add, 3, 4 ...@@ -638,11 +650,12 @@ cglobal vp3_idct_dc_add, 3, 4
%if ARCH_X86_64 %if ARCH_X86_64
movsxd r1, r1d movsxd r1, r1d
%endif %endif
lea r3, [r1*3] movsx r3, word [r2]
movsx r2, word [r2] mov word [r2], 0
add r2, 15 lea r2, [r1*3]
sar r2, 5 add r3, 15
movd m0, r2d sar r3, 5
movd m0, r3d
pshufw m0, m0, 0x0 pshufw m0, m0, 0x0
pxor m1, m1 pxor m1, m1
psubw m1, m0 psubw m1, m0
......
...@@ -32,7 +32,7 @@ void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); ...@@ -32,7 +32,7 @@ void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size, void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size,
const DCTELEM *block); DCTELEM *block);
void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride, void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
int *bounding_values); int *bounding_values);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment