Commit aedc9086 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master: (35 commits)
  flvdec: Do not call parse_keyframes_index with a NULL stream
  libspeexdec: include system headers before local headers
  libspeexdec: return meaningful error codes
  libspeexdec: cosmetics: reindent
  libspeexdec: decode one frame at a time.
  swscale: fix signed shift overflows in ff_yuv2rgb_c_init_tables()
  Move timefilter code from lavf to lavd.
  mov: add support for hdvd and pgapmetadata atoms
  mov: rename function _stik, some indentation cosmetics
  mov: rename function _int8 to remove ambiguity, some indentation cosmetics
  mov: parse the gnre atom
  mp3on4: check for allocation failures in decode_init_mp3on4()
  mp3on4: create a separate flush function for MP3onMP4.
  mp3on4: ensure that the frame channel count does not exceed the codec channel count.
  mp3on4: set channel layout
  mp3on4: fix the output channel order
  mp3on4: allocate temp buffer with av_malloc() instead of on the stack.
  mp3on4: copy MPADSPContext from first context to all contexts.
  fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
  fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm
  ...

Conflicts:
	libavcodec/arm/h264dsp_init_arm.c
	libavcodec/h264.c
	libavcodec/h264.h
	libavcodec/h264_cabac.c
	libavcodec/h264_cavlc.c
	libavcodec/h264_ps.c
	libavcodec/h264dsp_template.c
	libavcodec/h264idct_template.c
	libavcodec/h264pred.c
	libavcodec/h264pred_template.c
	libavcodec/x86/h264dsp_mmx.c
	libavdevice/Makefile
	libavdevice/jack_audio.c
	libavformat/Makefile
	libavformat/flvdec.c
	libavformat/flvenc.c
	libavutil/pixfmt.h
	libswscale/utils.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 1a7090bf f4b51d06
...@@ -67,6 +67,7 @@ easier to use. The changes are: ...@@ -67,6 +67,7 @@ easier to use. The changes are:
- aevalsrc audio source added - aevalsrc audio source added
- Ut Video decoder - Ut Video decoder
- Speex encoding via libspeex - Speex encoding via libspeex
- 4:2:2 H.264 decoding support
version 0.8: version 0.8:
......
...@@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, ...@@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0); int beta, int8_t *tc0);
void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den, void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int weight, int offset); int log2_den, int weight, int offset);
void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den, void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
int weight, int offset); int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den, void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
int weight, int offset); int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride, void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights, int height, int log2_den, int weightd,
int offset); int weights, int offset);
void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride, void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights, int height, int log2_den, int weightd,
int offset); int weights, int offset);
void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride, void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights, int height, int log2_den, int weightd,
int offset); int weights, int offset);
void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
...@@ -101,23 +76,14 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i ...@@ -101,23 +76,14 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
} }
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon; c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon; c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon; c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon; c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
c->h264_idct_add = ff_h264_idct_add_neon; c->h264_idct_add = ff_h264_idct_add_neon;
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
......
...@@ -1592,7 +1592,7 @@ endfunc ...@@ -1592,7 +1592,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q2, q8 vmov q2, q8
vmov q3, q8 vmov q3, q8
1: subs ip, ip, #2 1: subs r3, r3, #2
vld1.8 {d20-d21},[r0,:128], r2 vld1.8 {d20-d21},[r0,:128], r2
\macd q2, d0, d20 \macd q2, d0, d20
pld [r0] pld [r0]
...@@ -1632,7 +1632,7 @@ endfunc ...@@ -1632,7 +1632,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #2 1: subs r3, r3, #2
vld1.8 {d4},[r0,:64], r2 vld1.8 {d4},[r0,:64], r2
\macd q1, d0, d4 \macd q1, d0, d4
pld [r0] pld [r0]
...@@ -1662,7 +1662,7 @@ endfunc ...@@ -1662,7 +1662,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #4 1: subs r3, r3, #4
vld1.32 {d4[0]},[r0,:32], r2 vld1.32 {d4[0]},[r0,:32], r2
vld1.32 {d4[1]},[r0,:32], r2 vld1.32 {d4[1]},[r0,:32], r2
\macd q1, d0, d4 \macd q1, d0, d4
...@@ -1700,16 +1700,17 @@ endfunc ...@@ -1700,16 +1700,17 @@ endfunc
.endm .endm
.macro biweight_func w .macro biweight_func w
function biweight_h264_pixels_\w\()_neon function ff_biweight_h264_pixels_\w\()_neon, export=1
push {r4-r6, lr} push {r4-r6, lr}
add r4, sp, #16 ldr r12, [sp, #16]
add r4, sp, #20
ldm r4, {r4-r6} ldm r4, {r4-r6}
lsr lr, r4, #31 lsr lr, r4, #31
add r6, r6, #1 add r6, r6, #1
eors lr, lr, r5, lsr #30 eors lr, lr, r5, lsr #30
orr r6, r6, #1 orr r6, r6, #1
vdup.16 q9, r3 vdup.16 q9, r12
lsl r6, r6, r3 lsl r6, r6, r12
vmvn q9, q9 vmvn q9, q9
vdup.16 q8, r6 vdup.16 q8, r6
mov r6, r0 mov r6, r0
...@@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon ...@@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon
endfunc endfunc
.endm .endm
.macro biweight_entry w, h, b=1
function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b biweight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
biweight_entry 16, 8
biweight_entry 16, 16, b=0
biweight_func 16 biweight_func 16
biweight_entry 8, 16
biweight_entry 8, 4
biweight_entry 8, 8, b=0
biweight_func 8 biweight_func 8
biweight_entry 4, 8
biweight_entry 4, 2
biweight_entry 4, 4, b=0
biweight_func 4 biweight_func 4
@ Weighted prediction @ Weighted prediction
.macro weight_16 add .macro weight_16 add
vdup.8 d0, r3 vdup.8 d0, r12
1: subs ip, ip, #2 1: subs r2, r2, #2
vld1.8 {d20-d21},[r0,:128], r1 vld1.8 {d20-d21},[r0,:128], r1
vmull.u8 q2, d0, d20 vmull.u8 q2, d0, d20
pld [r0] pld [r0]
...@@ -1785,8 +1767,8 @@ endfunc ...@@ -1785,8 +1767,8 @@ endfunc
.endm .endm
.macro weight_8 add .macro weight_8 add
vdup.8 d0, r3 vdup.8 d0, r12
1: subs ip, ip, #2 1: subs r2, r2, #2
vld1.8 {d4},[r0,:64], r1 vld1.8 {d4},[r0,:64], r1
vmull.u8 q1, d0, d4 vmull.u8 q1, d0, d4
pld [r0] pld [r0]
...@@ -1806,10 +1788,10 @@ endfunc ...@@ -1806,10 +1788,10 @@ endfunc
.endm .endm
.macro weight_4 add .macro weight_4 add
vdup.8 d0, r3 vdup.8 d0, r12
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #4 1: subs r2, r2, #4
vld1.32 {d4[0]},[r0,:32], r1 vld1.32 {d4[0]},[r0,:32], r1
vld1.32 {d4[1]},[r0,:32], r1 vld1.32 {d4[1]},[r0,:32], r1
vmull.u8 q1, d0, d4 vmull.u8 q1, d0, d4
...@@ -1842,50 +1824,32 @@ endfunc ...@@ -1842,50 +1824,32 @@ endfunc
.endm .endm
.macro weight_func w .macro weight_func w
function weight_h264_pixels_\w\()_neon function ff_weight_h264_pixels_\w\()_neon, export=1
push {r4, lr} push {r4, lr}
ldr r4, [sp, #8] ldr r12, [sp, #8]
cmp r2, #1 ldr r4, [sp, #12]
lsl r4, r4, r2 cmp r3, #1
lsl r4, r4, r3
vdup.16 q8, r4 vdup.16 q8, r4
mov r4, r0 mov r4, r0
ble 20f ble 20f
rsb lr, r2, #1 rsb lr, r3, #1
vdup.16 q9, lr vdup.16 q9, lr
cmp r3, #0 cmp r12, #0
blt 10f blt 10f
weight_\w vhadd.s16 weight_\w vhadd.s16
10: rsb r3, r3, #0 10: rsb r12, r12, #0
weight_\w vhsub.s16 weight_\w vhsub.s16
20: rsb lr, r2, #0 20: rsb lr, r3, #0
vdup.16 q9, lr vdup.16 q9, lr
cmp r3, #0 cmp r12, #0
blt 10f blt 10f
weight_\w vadd.s16 weight_\w vadd.s16
10: rsb r3, r3, #0 10: rsb r12, r12, #0
weight_\w vsub.s16 weight_\w vsub.s16
endfunc endfunc
.endm .endm
.macro weight_entry w, h, b=1
function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b weight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
weight_entry 16, 8
weight_entry 16, 16, b=0
weight_func 16 weight_func 16
weight_entry 8, 16
weight_entry 8, 4
weight_entry 8, 8, b=0
weight_func 8 weight_func 8
weight_entry 4, 8
weight_entry 4, 2
weight_entry 4, 4, b=0
weight_func 4 weight_func 4
...@@ -70,7 +70,15 @@ typedef struct FmtConvertContext { ...@@ -70,7 +70,15 @@ typedef struct FmtConvertContext {
long len, int channels); long len, int channels);
/** /**
* Convert an array of interleaved float to multiple arrays of float. * Convert multiple arrays of float to an array of interleaved float.
*
* @param dst destination array of interleaved float.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/ */
void (*float_interleave)(float *dst, const float **src, unsigned int len, void (*float_interleave)(float *dst, const float **src, unsigned int len,
int channels); int channels);
......
This diff is collapsed.
...@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { ...@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
}; };
static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) { static av_always_inline void
decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
const uint32_t *qmul, int max_coeff,
int is_dc, int chroma422)
{
static const int significant_coeff_flag_offset[2][14] = { static const int significant_coeff_flag_offset[2][14] = {
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
{ 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
...@@ -1593,7 +1598,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1593,7 +1598,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
* map node ctx => cabac ctx for level=1 */ * map node ctx => cabac ctx for level=1 */
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
/* map node ctx => cabac ctx for level>1 */ /* map node ctx => cabac ctx for level>1 */
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 }; static const uint8_t coeff_abs_levelgt1_ctx[2][8] = {
{ 5, 5, 5, 5, 6, 7, 8, 9 },
{ 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case
};
static const uint8_t coeff_abs_level_transition[2][8] = { static const uint8_t coeff_abs_level_transition[2][8] = {
/* update node ctx after decoding a level=1 */ /* update node ctx after decoding a level=1 */
{ 1, 2, 3, 3, 4, 5, 6, 7 }, { 1, 2, 3, 3, 4, 5, 6, 7 },
...@@ -1652,7 +1660,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1652,7 +1660,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
last_coeff_ctx_base, sig_off); last_coeff_ctx_base, sig_off);
} else { } else {
if (is_dc && max_coeff == 8) { // dc 422 if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else { } else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
...@@ -1661,7 +1669,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1661,7 +1669,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
#else #else
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
} else { } else {
if (is_dc && max_coeff == 8) { // dc 422 if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else { } else {
DECODE_SIGNIFICANCE(max_coeff - 1, last, last); DECODE_SIGNIFICANCE(max_coeff - 1, last, last);
...@@ -1701,9 +1709,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1701,9 +1709,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} \ } \
} else { \ } else { \
int coeff_abs = 2; \ int coeff_abs = 2; \
if (is_dc && max_coeff == 8) \ ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \
node_ctx = FFMIN(node_ctx, 6); \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \ node_ctx = coeff_abs_level_transition[1][node_ctx]; \
\ \
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \ while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
...@@ -1745,11 +1751,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1745,11 +1751,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} }
static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1); decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0);
}
static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1);
} }
static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0); decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0);
} }
/* cat: 0-> DC 16x16 n = 0 /* cat: 0-> DC 16x16 n = 0
...@@ -1773,6 +1786,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM * ...@@ -1773,6 +1786,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
} }
static av_always_inline void
decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
/* read coded block flag */
if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) {
h->non_zero_count_cache[scan8[n]] = 0;
return;
}
decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff);
}
static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
/* read coded block flag */ /* read coded block flag */
if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
...@@ -2325,17 +2351,14 @@ decode_intra_mb: ...@@ -2325,17 +2351,14 @@ decode_intra_mb:
if(CHROMA444){ if(CHROMA444){
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
} else { } else if (CHROMA422) {
const int num_c8x8 = h->sps.chroma_format_idc;
if( cbp&0x30 ){ if( cbp&0x30 ){
int c; int c;
for( c = 0; c < 2; c++ ) { for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
CHROMA_DC_BLOCK_INDEX+c, CHROMA_DC_BLOCK_INDEX + c,
CHROMA422 ? chroma422_dc_scan : chroma_dc_scan, chroma422_dc_scan, 8);
4*num_c8x8);
} }
} }
...@@ -2344,7 +2367,7 @@ decode_intra_mb: ...@@ -2344,7 +2367,7 @@ decode_intra_mb:
for( c = 0; c < 2; c++ ) { for( c = 0; c < 2; c++ ) {
DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift); DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for (i8x8 = 0; i8x8 < num_c8x8; i8x8++) { for (i8x8 = 0; i8x8 < 2; i8x8++) {
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
const int index = 16 + 16 * c + 8*i8x8 + i; const int index = 16 + 16 * c + 8*i8x8 + i;
//av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16); //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16);
...@@ -2357,6 +2380,29 @@ decode_intra_mb: ...@@ -2357,6 +2380,29 @@ decode_intra_mb:
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
} }
} else /* yuv420 */ {
if( cbp&0x30 ){
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
if( cbp&0x20 ) {
int c, i;
for( c = 0; c < 2; c++ ) {
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for( i = 0; i < 4; i++ ) {
const int index = 16 + 16 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
}
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
} }
} else { } else {
fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
......
...@@ -415,7 +415,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ ...@@ -415,7 +415,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
#endif #endif
sps->crop= get_bits1(&s->gb); sps->crop= get_bits1(&s->gb);
if(sps->crop){ if(sps->crop){
int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8; int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8;
int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8; int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8;
sps->crop_left = get_ue_golomb(&s->gb); sps->crop_left = get_ue_golomb(&s->gb);
sps->crop_right = get_ue_golomb(&s->gb); sps->crop_right = get_ue_golomb(&s->gb);
......
...@@ -64,26 +64,14 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo ...@@ -64,26 +64,14 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo
else\ else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
\ \
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\ c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\ c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\
c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\ c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\ c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\
c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\ c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\
c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\ c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\
c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
\ \
c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\ c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\ c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
......
...@@ -31,16 +31,18 @@ ...@@ -31,16 +31,18 @@
#include "dsputil.h" #include "dsputil.h"
//typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); //typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset);
/** /**
* Context for storing H.264 DSP functions * Context for storing H.264 DSP functions
*/ */
typedef struct H264DSPContext{ typedef struct H264DSPContext{
/* weighted MC */ /* weighted MC */
h264_weight_func weight_h264_pixels_tab[10]; h264_weight_func weight_h264_pixels_tab[4];
h264_biweight_func biweight_h264_pixels_tab[10]; h264_biweight_func biweight_h264_pixels_tab[4];
/* loop filter */ /* loop filter */
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
......
...@@ -29,14 +29,16 @@ ...@@ -29,14 +29,16 @@
#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
#define H264_WEIGHT(W,H) \ #define H264_WEIGHT(W) \
static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int stride, int log2_denom, int weight, int offset){ \ static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \
int log2_denom, int weight, int offset) \
{ \
int y; \ int y; \
pixel *block = (pixel*)p_block; \ pixel *block = (pixel*)_block; \
stride >>= sizeof(pixel)-1; \ stride >>= sizeof(pixel)-1; \
offset <<= (log2_denom + (BIT_DEPTH-8)); \ offset <<= (log2_denom + (BIT_DEPTH-8)); \
if(log2_denom) offset += 1<<(log2_denom-1); \ if(log2_denom) offset += 1<<(log2_denom-1); \
for(y=0; y<H; y++, block += stride){ \ for (y = 0; y < height; y++, block += stride) { \
op_scale1(0); \ op_scale1(0); \
op_scale1(1); \ op_scale1(1); \
if(W==2) continue; \ if(W==2) continue; \
...@@ -58,14 +60,16 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int strid ...@@ -58,14 +60,16 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int strid
op_scale1(15); \ op_scale1(15); \
} \ } \
} \ } \
static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \ static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset) \
{ \
int y; \ int y; \
pixel *dst = (pixel*)_dst; \ pixel *dst = (pixel*)_dst; \
pixel *src = (pixel*)_src; \ pixel *src = (pixel*)_src; \
stride >>= sizeof(pixel)-1; \ stride >>= sizeof(pixel)-1; \
offset <<= (BIT_DEPTH-8); \ offset <<= (BIT_DEPTH-8); \
offset = ((offset + 1) | 1) << log2_denom; \ offset = ((offset + 1) | 1) << log2_denom; \
for(y=0; y<H; y++, dst += stride, src += stride){ \ for (y = 0; y < height; y++, dst += stride, src += stride) { \
op_scale2(0); \ op_scale2(0); \
op_scale2(1); \ op_scale2(1); \
if(W==2) continue; \ if(W==2) continue; \
...@@ -88,16 +92,10 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_ ...@@ -88,16 +92,10 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_
} \ } \
} }
H264_WEIGHT(16,16) H264_WEIGHT(16)
H264_WEIGHT(16,8) H264_WEIGHT(8)
H264_WEIGHT(8,16) H264_WEIGHT(4)
H264_WEIGHT(8,8) H264_WEIGHT(2)
H264_WEIGHT(8,4)
H264_WEIGHT(4,8)
H264_WEIGHT(4,4)
H264_WEIGHT(4,2)
H264_WEIGHT(2,4)
H264_WEIGHT(2,2)
#undef op_scale1 #undef op_scale1
#undef op_scale2 #undef op_scale2
......
...@@ -228,16 +228,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM * ...@@ -228,16 +228,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
int i, j; int i, j;
#if 0
av_log(NULL, AV_LOG_INFO, "idct\n");
int32_t *b = block;
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
for(j=1; j<3; j++){ for(j=1; j<3; j++){
for(i=j*16; i<j*16+4; i++){ for(i=j*16; i<j*16+4; i++){
if(nnzc[ scan8[i] ]) if(nnzc[ scan8[i] ])
...@@ -296,13 +286,13 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in ...@@ -296,13 +286,13 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in
#undef stride #undef stride
} }
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2; const int stride= 16*2;
const int xStride= 16; const int xStride= 16;
int i; int i;
int temp[8]; int temp[8];
static const uint8_t x_offset[2]={0, 16}; static const uint8_t x_offset[2]={0, 16};
dctcoef *block = (dctcoef*)p_block; dctcoef *block = (dctcoef*)_block;
for(i=0; i<4; i++){ for(i=0; i<4; i++){
temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1]; temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
...@@ -321,22 +311,13 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){ ...@@ -321,22 +311,13 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8; block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8; block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
} }
#if 0
av_log(NULL, AV_LOG_INFO, "after chroma dc\n");
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", block[i]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
} }
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){ void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2; const int stride= 16*2;
const int xStride= 16; const int xStride= 16;
int a,b,c,d,e; int a,b,c,d,e;
dctcoef *block = (dctcoef*)p_block; dctcoef *block = (dctcoef*)_block;
a= block[stride*0 + xStride*0]; a= block[stride*0 + xStride*0];
b= block[stride*0 + xStride*1]; b= block[stride*0 + xStride*1];
......
...@@ -462,10 +462,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co ...@@ -462,10 +462,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\
}\ }\
}else{\ }else{\
h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
...@@ -510,8 +510,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co ...@@ -510,8 +510,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\
h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\
h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\
if (chroma_format_idc == 1) {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\
} else {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add , depth);\
}\
h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\ h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\
h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\ h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\
......
...@@ -663,23 +663,45 @@ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ ...@@ -663,23 +663,45 @@ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred4x4_dc)(src, NULL, stride); FUNCC(pred4x4_dc)(src, NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred8x16_top_dc)(src, stride);
FUNCC(pred4x4_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x8_dc)(src, stride); FUNCC(pred8x8_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride); FUNCC(pred4x4_top_dc)(src, NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x16_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride); FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
}
static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
int j, k; int j, k;
int a; int a;
...@@ -1126,8 +1148,24 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, c ...@@ -1126,8 +1148,24 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, c
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i; int i;
for(i=0; i<4; i++) for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "avcodec.h"
#include <speex/speex.h> #include <speex/speex.h>
#include <speex/speex_header.h> #include <speex/speex_header.h>
#include <speex/speex_stereo.h> #include <speex/speex_stereo.h>
#include <speex/speex_callbacks.h> #include <speex/speex_callbacks.h>
#include "avcodec.h"
typedef struct { typedef struct {
SpeexBits bits; SpeexBits bits;
...@@ -60,14 +60,14 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx) ...@@ -60,14 +60,14 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx)
mode = speex_lib_get_mode(s->header->mode); mode = speex_lib_get_mode(s->header->mode);
if (!mode) { if (!mode) {
av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode); av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode);
return -1; return AVERROR_INVALIDDATA;
} }
} else } else
av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n"); av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n");
if (avctx->channels > 2) { if (avctx->channels > 2) {
av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n"); av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n");
return -1; return AVERROR(EINVAL);
} }
speex_bits_init(&s->bits); speex_bits_init(&s->bits);
...@@ -99,32 +99,42 @@ static int libspeex_decode_frame(AVCodecContext *avctx, ...@@ -99,32 +99,42 @@ static int libspeex_decode_frame(AVCodecContext *avctx,
uint8_t *buf = avpkt->data; uint8_t *buf = avpkt->data;
int buf_size = avpkt->size; int buf_size = avpkt->size;
LibSpeexContext *s = avctx->priv_data; LibSpeexContext *s = avctx->priv_data;
int16_t *output = data, *end; int16_t *output = data;
int i, num_samples; int out_size, ret, consumed = 0;
num_samples = s->frame_size * avctx->channels; /* check output buffer size */
end = output + *data_size / sizeof(*output); out_size = s->frame_size * avctx->channels *
av_get_bytes_per_sample(avctx->sample_fmt);
speex_bits_read_from(&s->bits, buf, buf_size); if (*data_size < out_size) {
av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n");
for (i = 0; speex_bits_remaining(&s->bits) && output + num_samples < end; i++) { return AVERROR(EINVAL);
int ret = speex_decode_int(s->dec_state, &s->bits, output); }
if (ret <= -2) {
av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n");
return -1;
} else if (ret == -1)
// end of stream
break;
if (avctx->channels == 2) /* if there is not enough data left for the smallest possible frame,
speex_decode_stereo_int(output, s->frame_size, &s->stereo); reset the libspeex buffer using the current packet, otherwise ignore
the current packet and keep decoding frames from the libspeex buffer. */
if (speex_bits_remaining(&s->bits) < 43) {
/* check for flush packet */
if (!buf || !buf_size) {
*data_size = 0;
return buf_size;
}
/* set new buffer */
speex_bits_read_from(&s->bits, buf, buf_size);
consumed = buf_size;
}
output += num_samples; /* decode a single frame */
ret = speex_decode_int(s->dec_state, &s->bits, output);
if (ret <= -2) {
av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n");
return AVERROR_INVALIDDATA;
} }
if (avctx->channels == 2)
speex_decode_stereo_int(output, s->frame_size, &s->stereo);
avctx->frame_size = s->frame_size * i; *data_size = out_size;
*data_size = avctx->channels * avctx->frame_size * sizeof(*output); return consumed;
return buf_size;
} }
static av_cold int libspeex_decode_close(AVCodecContext *avctx) static av_cold int libspeex_decode_close(AVCodecContext *avctx)
...@@ -138,6 +148,12 @@ static av_cold int libspeex_decode_close(AVCodecContext *avctx) ...@@ -138,6 +148,12 @@ static av_cold int libspeex_decode_close(AVCodecContext *avctx)
return 0; return 0;
} }
static av_cold void libspeex_decode_flush(AVCodecContext *avctx)
{
LibSpeexContext *s = avctx->priv_data;
speex_bits_reset(&s->bits);
}
AVCodec ff_libspeex_decoder = { AVCodec ff_libspeex_decoder = {
.name = "libspeex", .name = "libspeex",
.type = AVMEDIA_TYPE_AUDIO, .type = AVMEDIA_TYPE_AUDIO,
...@@ -146,5 +162,7 @@ AVCodec ff_libspeex_decoder = { ...@@ -146,5 +162,7 @@ AVCodec ff_libspeex_decoder = {
.init = libspeex_decode_init, .init = libspeex_decode_init,
.close = libspeex_decode_close, .close = libspeex_decode_close,
.decode = libspeex_decode_frame, .decode = libspeex_decode_frame,
.flush = libspeex_decode_flush,
.capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY,
.long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"), .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
}; };
...@@ -1893,24 +1893,50 @@ typedef struct MP3On4DecodeContext { ...@@ -1893,24 +1893,50 @@ typedef struct MP3On4DecodeContext {
int syncword; ///< syncword patch int syncword; ///< syncword patch
const uint8_t *coff; ///< channels offsets in output buffer const uint8_t *coff; ///< channels offsets in output buffer
MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
OUT_INT *decoded_buf; ///< output buffer for decoded samples
} MP3On4DecodeContext; } MP3On4DecodeContext;
#include "mpeg4audio.h" #include "mpeg4audio.h"
/* Next 3 arrays are indexed by channel config number (passed via codecdata) */ /* Next 3 arrays are indexed by channel config number (passed via codecdata) */
static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */ static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */
/* offsets into output buffer, assume output order is FL FR BL BR C LFE */ /* offsets into output buffer, assume output order is FL FR C LFE BL BR SL SR */
static const uint8_t chan_offset[8][5] = { static const uint8_t chan_offset[8][5] = {
{0}, {0},
{0}, // C {0}, // C
{0}, // FLR {0}, // FLR
{2,0}, // C FLR {2,0}, // C FLR
{2,0,3}, // C FLR BS {2,0,3}, // C FLR BS
{4,0,2}, // C FLR BLRS {2,0,3}, // C FLR BLRS
{4,0,2,5}, // C FLR BLRS LFE {2,0,4,3}, // C FLR BLRS LFE
{4,0,2,6,5}, // C FLR BLRS BLR LFE {2,0,6,4,3}, // C FLR BLRS BLR LFE
}; };
/* mp3on4 channel layouts */
static const int16_t chan_layout[8] = {
0,
AV_CH_LAYOUT_MONO,
AV_CH_LAYOUT_STEREO,
AV_CH_LAYOUT_SURROUND,
AV_CH_LAYOUT_4POINT0,
AV_CH_LAYOUT_5POINT0,
AV_CH_LAYOUT_5POINT1,
AV_CH_LAYOUT_7POINT1
};
static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
{
MP3On4DecodeContext *s = avctx->priv_data;
int i;
for (i = 0; i < s->frames; i++)
av_free(s->mp3decctx[i]);
av_freep(&s->decoded_buf);
return 0;
}
static int decode_init_mp3on4(AVCodecContext * avctx) static int decode_init_mp3on4(AVCodecContext * avctx)
{ {
...@@ -1931,6 +1957,7 @@ static int decode_init_mp3on4(AVCodecContext * avctx) ...@@ -1931,6 +1957,7 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
s->frames = mp3Frames[cfg.chan_config]; s->frames = mp3Frames[cfg.chan_config];
s->coff = chan_offset[cfg.chan_config]; s->coff = chan_offset[cfg.chan_config];
avctx->channels = ff_mpeg4audio_channels[cfg.chan_config]; avctx->channels = ff_mpeg4audio_channels[cfg.chan_config];
avctx->channel_layout = chan_layout[cfg.chan_config];
if (cfg.sample_rate < 16000) if (cfg.sample_rate < 16000)
s->syncword = 0xffe00000; s->syncword = 0xffe00000;
...@@ -1944,6 +1971,8 @@ static int decode_init_mp3on4(AVCodecContext * avctx) ...@@ -1944,6 +1971,8 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/ */
// Allocate zeroed memory for the first decoder context // Allocate zeroed memory for the first decoder context
s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext)); s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[0])
goto alloc_fail;
// Put decoder context in place to make init_decode() happy // Put decoder context in place to make init_decode() happy
avctx->priv_data = s->mp3decctx[0]; avctx->priv_data = s->mp3decctx[0];
decode_init(avctx); decode_init(avctx);
...@@ -1956,23 +1985,38 @@ static int decode_init_mp3on4(AVCodecContext * avctx) ...@@ -1956,23 +1985,38 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/ */
for (i = 1; i < s->frames; i++) { for (i = 1; i < s->frames; i++) {
s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[i])
goto alloc_fail;
s->mp3decctx[i]->adu_mode = 1; s->mp3decctx[i]->adu_mode = 1;
s->mp3decctx[i]->avctx = avctx; s->mp3decctx[i]->avctx = avctx;
s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp;
}
/* Allocate buffer for multi-channel output if needed */
if (s->frames > 1) {
s->decoded_buf = av_malloc(MPA_FRAME_SIZE * MPA_MAX_CHANNELS *
sizeof(*s->decoded_buf));
if (!s->decoded_buf)
goto alloc_fail;
} }
return 0; return 0;
alloc_fail:
decode_close_mp3on4(avctx);
return AVERROR(ENOMEM);
} }
static av_cold int decode_close_mp3on4(AVCodecContext * avctx) static void flush_mp3on4(AVCodecContext *avctx)
{ {
MP3On4DecodeContext *s = avctx->priv_data;
int i; int i;
MP3On4DecodeContext *s = avctx->priv_data;
for (i = 0; i < s->frames; i++) for (i = 0; i < s->frames; i++) {
av_free(s->mp3decctx[i]); MPADecodeContext *m = s->mp3decctx[i];
memset(m->synth_buf, 0, sizeof(m->synth_buf));
return 0; m->last_buf_size = 0;
}
} }
...@@ -1987,12 +2031,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, ...@@ -1987,12 +2031,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
int fsize, len = buf_size, out_size = 0; int fsize, len = buf_size, out_size = 0;
uint32_t header; uint32_t header;
OUT_INT *out_samples = data; OUT_INT *out_samples = data;
OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS];
OUT_INT *outptr, *bp; OUT_INT *outptr, *bp;
int fr, j, n; int fr, j, n, ch;
if(*data_size < MPA_FRAME_SIZE * MPA_MAX_CHANNELS * s->frames * sizeof(OUT_INT)) if (*data_size < MPA_FRAME_SIZE * avctx->channels * sizeof(OUT_INT)) {
return -1; av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
return AVERROR(EINVAL);
}
*data_size = 0; *data_size = 0;
// Discard too short frames // Discard too short frames
...@@ -2000,10 +2045,11 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, ...@@ -2000,10 +2045,11 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
return -1; return -1;
// If only one decoder interleave is not needed // If only one decoder interleave is not needed
outptr = s->frames == 1 ? out_samples : decoded_buf; outptr = s->frames == 1 ? out_samples : s->decoded_buf;
avctx->bit_rate = 0; avctx->bit_rate = 0;
ch = 0;
for (fr = 0; fr < s->frames; fr++) { for (fr = 0; fr < s->frames; fr++) {
fsize = AV_RB16(buf) >> 4; fsize = AV_RB16(buf) >> 4;
fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE); fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE);
...@@ -2016,6 +2062,14 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, ...@@ -2016,6 +2062,14 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
break; break;
avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header); avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
if (ch + m->nb_channels > avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec "
"channel count\n");
return AVERROR_INVALIDDATA;
}
ch += m->nb_channels;
out_size += mp_decode_frame(m, outptr, buf, fsize); out_size += mp_decode_frame(m, outptr, buf, fsize);
buf += fsize; buf += fsize;
len -= fsize; len -= fsize;
...@@ -2026,13 +2080,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx, ...@@ -2026,13 +2080,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
bp = out_samples + s->coff[fr]; bp = out_samples + s->coff[fr];
if(m->nb_channels == 1) { if(m->nb_channels == 1) {
for(j = 0; j < n; j++) { for(j = 0; j < n; j++) {
*bp = decoded_buf[j]; *bp = s->decoded_buf[j];
bp += avctx->channels; bp += avctx->channels;
} }
} else { } else {
for(j = 0; j < n; j++) { for(j = 0; j < n; j++) {
bp[0] = decoded_buf[j++]; bp[0] = s->decoded_buf[j++];
bp[1] = decoded_buf[j]; bp[1] = s->decoded_buf[j];
bp += avctx->channels; bp += avctx->channels;
} }
} }
...@@ -2110,7 +2164,7 @@ AVCodec ff_mp3on4_decoder = { ...@@ -2110,7 +2164,7 @@ AVCodec ff_mp3on4_decoder = {
.init = decode_init_mp3on4, .init = decode_init_mp3on4,
.close = decode_close_mp3on4, .close = decode_close_mp3on4,
.decode = decode_frame_mp3on4, .decode = decode_frame_mp3on4,
.flush = flush, .flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
}; };
#endif #endif
......
...@@ -83,7 +83,7 @@ AVCodec ff_mp3on4float_decoder = { ...@@ -83,7 +83,7 @@ AVCodec ff_mp3on4float_decoder = {
.init = decode_init_mp3on4, .init = decode_init_mp3on4,
.close = decode_close_mp3on4, .close = decode_close_mp3on4,
.decode = decode_frame_mp3on4, .decode = decode_frame_mp3on4,
.flush = flush, .flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
}; };
#endif #endif
...@@ -843,7 +843,8 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, ...@@ -843,7 +843,8 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
} }
static av_always_inline static av_always_inline
void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h) void weight_h264_W_altivec(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset, int w)
{ {
int y, aligned; int y, aligned;
vec_u8 vblock; vec_u8 vblock;
...@@ -864,7 +865,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei ...@@ -864,7 +865,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
voffset = vec_splat(vtemp, 5); voffset = vec_splat(vtemp, 5);
aligned = !((unsigned long)block & 0xf); aligned = !((unsigned long)block & 0xf);
for (y=0; y<h; y++) { for (y = 0; y < height; y++) {
vblock = vec_ld(0, block); vblock = vec_ld(0, block);
v0 = (vec_s16)vec_mergeh(zero_u8v, vblock); v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
...@@ -888,8 +889,8 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei ...@@ -888,8 +889,8 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
} }
static av_always_inline static av_always_inline
void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height,
int weightd, int weights, int offset, int w, int h) int log2_denom, int weightd, int weights, int offset, int w)
{ {
int y, dst_aligned, src_aligned; int y, dst_aligned, src_aligned;
vec_u8 vsrc, vdst; vec_u8 vsrc, vdst;
...@@ -912,7 +913,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ ...@@ -912,7 +913,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
dst_aligned = !((unsigned long)dst & 0xf); dst_aligned = !((unsigned long)dst & 0xf);
src_aligned = !((unsigned long)src & 0xf); src_aligned = !((unsigned long)src & 0xf);
for (y=0; y<h; y++) { for (y = 0; y < height; y++) {
vdst = vec_ld(0, dst); vdst = vec_ld(0, dst);
vsrc = vec_ld(0, src); vsrc = vec_ld(0, src);
...@@ -952,19 +953,18 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ ...@@ -952,19 +953,18 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
} }
} }
#define H264_WEIGHT(W,H) \ #define H264_WEIGHT(W) \
static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ static void ff_weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \
weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \ int log2_denom, int weight, int offset){ \
weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \
}\ }\
static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ static void ff_biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \
biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \ int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \
} }
H264_WEIGHT(16,16) H264_WEIGHT(16)
H264_WEIGHT(16, 8) H264_WEIGHT( 8)
H264_WEIGHT( 8,16)
H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int high_bit_depth = avctx->bits_per_raw_sample > 8;
...@@ -1015,16 +1015,10 @@ void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chrom ...@@ -1015,16 +1015,10 @@ void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_altivec;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_altivec;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_altivec;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
} }
} }
} }
...@@ -158,6 +158,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int l ...@@ -158,6 +158,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int l
case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P10BE: case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV444P9LE: case PIX_FMT_YUV444P9LE:
......
...@@ -41,24 +41,57 @@ static void free_buffers(VP8Context *s) ...@@ -41,24 +41,57 @@ static void free_buffers(VP8Context *s)
av_freep(&s->top_nnz); av_freep(&s->top_nnz);
av_freep(&s->edge_emu_buffer); av_freep(&s->edge_emu_buffer);
av_freep(&s->top_border); av_freep(&s->top_border);
av_freep(&s->segmentation_map);
s->macroblocks = NULL; s->macroblocks = NULL;
} }
static void vp8_decode_flush(AVCodecContext *avctx) static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
{
int ret;
if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
return ret;
if (!s->maps_are_invalid && s->num_maps_to_be_freed) {
f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
} else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
ff_thread_release_buffer(s->avctx, f);
return AVERROR(ENOMEM);
}
return 0;
}
static void vp8_release_frame(VP8Context *s, AVFrame *f, int is_close)
{
if (!is_close) {
if (f->ref_index[0]) {
assert(s->num_maps_to_be_freed < FF_ARRAY_ELEMS(s->segmentation_maps));
s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
f->ref_index[0] = NULL;
}
} else {
av_freep(&f->ref_index[0]);
}
ff_thread_release_buffer(s->avctx, f);
}
static void vp8_decode_flush_impl(AVCodecContext *avctx, int force, int is_close)
{ {
VP8Context *s = avctx->priv_data; VP8Context *s = avctx->priv_data;
int i; int i;
if (!avctx->is_copy) { if (!avctx->is_copy || force) {
for (i = 0; i < 5; i++) for (i = 0; i < 5; i++)
if (s->frames[i].data[0]) if (s->frames[i].data[0])
ff_thread_release_buffer(avctx, &s->frames[i]); vp8_release_frame(s, &s->frames[i], is_close);
} }
memset(s->framep, 0, sizeof(s->framep)); memset(s->framep, 0, sizeof(s->framep));
free_buffers(s); free_buffers(s);
s->maps_are_invalid = 1;
}
static void vp8_decode_flush(AVCodecContext *avctx)
{
vp8_decode_flush_impl(avctx, 0, 0);
} }
static int update_dimensions(VP8Context *s, int width, int height) static int update_dimensions(VP8Context *s, int width, int height)
...@@ -68,7 +101,7 @@ static int update_dimensions(VP8Context *s, int width, int height) ...@@ -68,7 +101,7 @@ static int update_dimensions(VP8Context *s, int width, int height)
if (av_image_check_size(width, height, 0, s->avctx)) if (av_image_check_size(width, height, 0, s->avctx))
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
vp8_decode_flush(s->avctx); vp8_decode_flush_impl(s->avctx, 1, 0);
avcodec_set_dimensions(s->avctx, width, height); avcodec_set_dimensions(s->avctx, width, height);
} }
...@@ -81,10 +114,9 @@ static int update_dimensions(VP8Context *s, int width, int height) ...@@ -81,10 +114,9 @@ static int update_dimensions(VP8Context *s, int width, int height)
s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
!s->top_nnz || !s->top_border || !s->segmentation_map) !s->top_nnz || !s->top_border)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
s->macroblocks = s->macroblocks_base + 1; s->macroblocks = s->macroblocks_base + 1;
...@@ -1508,6 +1540,14 @@ static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y) ...@@ -1508,6 +1540,14 @@ static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
} }
} }
static void release_queued_segmaps(VP8Context *s, int is_close)
{
int leave_behind = is_close ? 0 : !s->maps_are_invalid;
while (s->num_maps_to_be_freed > leave_behind)
av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
s->maps_are_invalid = 0;
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt) AVPacket *avpkt)
{ {
...@@ -1516,6 +1556,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1516,6 +1556,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
enum AVDiscard skip_thresh; enum AVDiscard skip_thresh;
AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT]; AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
release_queued_segmaps(s, 0);
if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
return ret; return ret;
...@@ -1538,7 +1580,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1538,7 +1580,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
ff_thread_release_buffer(avctx, &s->frames[i]); vp8_release_frame(s, &s->frames[i], 0);
// find a free buffer // find a free buffer
for (i = 0; i < 5; i++) for (i = 0; i < 5; i++)
...@@ -1559,8 +1601,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1559,8 +1601,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
curframe->key_frame = s->keyframe; curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0; curframe->reference = referenced ? 3 : 0;
curframe->ref_index[0] = s->segmentation_map; if ((ret = vp8_alloc_frame(s, curframe))) {
if ((ret = ff_thread_get_buffer(avctx, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
return ret; return ret;
} }
...@@ -1652,8 +1693,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -1652,8 +1693,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy, decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL); prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
...@@ -1736,7 +1777,8 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) ...@@ -1736,7 +1777,8 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
static av_cold int vp8_decode_free(AVCodecContext *avctx) static av_cold int vp8_decode_free(AVCodecContext *avctx)
{ {
vp8_decode_flush(avctx); vp8_decode_flush_impl(avctx, 0, 1);
release_queued_segmaps(avctx->priv_data, 1);
return 0; return 0;
} }
......
...@@ -130,7 +130,6 @@ typedef struct { ...@@ -130,7 +130,6 @@ typedef struct {
uint8_t *intra4x4_pred_mode_top; uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4]; uint8_t intra4x4_pred_mode_left[4];
uint8_t *segmentation_map;
/** /**
* Macroblocks can have one of 4 different quants in a frame when * Macroblocks can have one of 4 different quants in a frame when
...@@ -237,6 +236,16 @@ typedef struct { ...@@ -237,6 +236,16 @@ typedef struct {
H264PredContext hpc; H264PredContext hpc;
vp8_mc_func put_pixels_tab[3][3][3]; vp8_mc_func put_pixels_tab[3][3][3];
AVFrame frames[5]; AVFrame frames[5];
/**
* A list of segmentation_map buffers that are to be free()'ed in
* the next decoding iteration. We can't free() them right away
* because the map may still be used by subsequent decoding threads.
* Unused if frame threading is off.
*/
uint8_t *segmentation_maps[5];
int num_maps_to_be_freed;
int maps_are_invalid;
} VP8Context; } VP8Context;
#endif /* AVCODEC_VP8_H */ #endif /* AVCODEC_VP8_H */
...@@ -1055,14 +1055,6 @@ emu_edge mmx ...@@ -1055,14 +1055,6 @@ emu_edge mmx
; int32_t max, unsigned int len) ; int32_t max, unsigned int len)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro VECTOR_CLIP_INT32 4 %macro VECTOR_CLIP_INT32 4
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
%ifidn %1, sse2 %ifidn %1, sse2
......
...@@ -24,6 +24,146 @@ ...@@ -24,6 +24,146 @@
SECTION_TEXT SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 2
%ifdef ARCH_X86_64
cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
%else
cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
movss m0, mulm
%endif
SPLATD m0
shl lenq, 2
add srcq, lenq
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtdq2ps m1, [srcq+lenq ]
cvtdq2ps m2, [srcq+lenq+16]
%else
cvtpi2ps m1, [srcq+lenq ]
cvtpi2ps m3, [srcq+lenq+ 8]
cvtpi2ps m2, [srcq+lenq+16]
cvtpi2ps m4, [srcq+lenq+24]
movlhps m1, m3
movlhps m2, m4
%endif
mulps m1, m0
mulps m2, m0
mova [dstq+lenq ], m1
mova [dstq+lenq+16], m2
add lenq, 32
jl .loop
REP_RET
%endmacro
INIT_XMM
%define SPLATD SPLATD_SSE
%define movdqa movaps
INT32_TO_FLOAT_FMUL_SCALAR sse, 5
%undef movdqa
%define SPLATD SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
%undef SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16 2
cglobal float_to_int16_%1, 3,3,%2, dst, src, len
add lenq, lenq
lea srcq, [srcq+2*lenq]
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [srcq+2*lenq ]
cvtps2pi m1, [srcq+2*lenq+ 8]
cvtps2pi m2, [srcq+2*lenq+16]
cvtps2pi m3, [srcq+2*lenq+24]
packssdw m0, m1
packssdw m2, m3
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m2
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16 sse2, 2
INIT_MMX
FLOAT_TO_INT16 sse, 0
%define cvtps2pi pf2id
FLOAT_TO_INT16 3dnow, 0
%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_INTERLEAVE2 1
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
lea lenq, [4*r2q]
mov src1q, [src0q+gprsize]
mov src0q, [src0q]
add dstq, lenq
add src0q, lenq
add src1q, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [src0q+lenq]
cvtps2dq m1, [src1q+lenq]
packssdw m0, m1
movhlps m1, m0
punpcklwd m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [src0q+lenq ]
cvtps2pi m1, [src0q+lenq+8]
cvtps2pi m2, [src1q+lenq ]
cvtps2pi m3, [src1q+lenq+8]
packssdw m0, m1
packssdw m2, m3
mova m1, m0
punpcklwd m0, m2
punpckhwd m1, m2
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m1
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define cvtps2pi pf2id
FLOAT_TO_INT16_INTERLEAVE2 3dnow
%undef cvtps2pi
%define movdqa movaps
FLOAT_TO_INT16_INTERLEAVE2 sse
%undef movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2 %macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e pshufw %1, %2, 0x4e
%endmacro %endmacro
......
This diff is collapsed.
...@@ -28,21 +28,20 @@ SECTION .text ...@@ -28,21 +28,20 @@ SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; biweight pred: ; biweight pred:
; ;
; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride, ; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int log2_denom, int weightd, int weights, ; int height, int log2_denom, int weightd,
; int offset); ; int weights, int offset);
; and ; and
; void h264_weight_16x16_sse2(uint8_t *dst, int stride, ; void h264_weight_16_sse2(uint8_t *dst, int stride, int height,
; int log2_denom, int weight, ; int log2_denom, int weight, int offset);
; int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro WEIGHT_SETUP 0 %macro WEIGHT_SETUP 0
add r4, r4 add r5, r5
inc r4 inc r5
movd m3, r3d movd m3, r4d
movd m5, r4d movd m5, r5d
movd m6, r2d movd m6, r3d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
%if mmsize == 16 %if mmsize == 16
...@@ -71,60 +70,41 @@ SECTION .text ...@@ -71,60 +70,41 @@ SECTION .text
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro
%macro WEIGHT_FUNC_DBL_MM 1 INIT_MMX
cglobal h264_weight_16x%1_mmx2, 5, 5, 0 cglobal h264_weight_16_mmx2, 6, 6, 0
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %1
%if %1 == 16
.nextrow .nextrow
WEIGHT_OP 0, 4 WEIGHT_OP 0, 4
mova [r0 ], m0 mova [r0 ], m0
WEIGHT_OP 8, 12 WEIGHT_OP 8, 12
mova [r0+8], m0 mova [r0+8], m0
add r0, r1 add r0, r1
dec r2 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX %macro WEIGHT_FUNC_MM 3
WEIGHT_FUNC_DBL_MM 16 cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_FUNC_DBL_MM 8
%macro WEIGHT_FUNC_MM 4
cglobal h264_weight_%1x%2_%4, 7, 7, %3
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %2
%if %2 == 16
.nextrow .nextrow
WEIGHT_OP 0, mmsize/2 WEIGHT_OP 0, mmsize/2
mova [r0], m0 mova [r0], m0
add r0, r1 add r0, r1
dec r2 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_%1x16_%4.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
WEIGHT_FUNC_MM 8, 16, 0, mmx2 WEIGHT_FUNC_MM 8, 0, mmx2
WEIGHT_FUNC_MM 8, 8, 0, mmx2
WEIGHT_FUNC_MM 8, 4, 0, mmx2
INIT_XMM INIT_XMM
WEIGHT_FUNC_MM 16, 16, 8, sse2 WEIGHT_FUNC_MM 16, 8, sse2
WEIGHT_FUNC_MM 16, 8, 8, sse2
%macro WEIGHT_FUNC_HALF_MM 5 %macro WEIGHT_FUNC_HALF_MM 3
cglobal h264_weight_%1x%2_%5, 5, 5, %4 cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %2/2 sar r2d, 1
lea r3, [r1*2] lea r3, [r1*2]
%if %2 == mmsize
.nextrow .nextrow
WEIGHT_OP 0, r1 WEIGHT_OP 0, r1
movh [r0], m0 movh [r0], m0
...@@ -135,31 +115,34 @@ cglobal h264_weight_%1x%2_%5, 5, 5, %4 ...@@ -135,31 +115,34 @@ cglobal h264_weight_%1x%2_%5, 5, 5, %4
movh [r0+r1], m0 movh [r0+r1], m0
%endif %endif
add r0, r3 add r0, r3
dec r2 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_%1x%3_%5.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2 WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2 WEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM INIT_XMM
WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2 WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 WEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SETUP 0 %macro BIWEIGHT_SETUP 0
add r6, 1 %ifdef ARCH_X86_64
or r6, 1 %define off_regd r11d
add r3, 1 %else
movd m3, r4d %define off_regd r3d
movd m4, r5d %endif
movd m5, r6d mov off_regd, r7m
movd m6, r3d add off_regd, 1
or off_regd, 1
add r4, 1
movd m3, r5d
movd m4, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
%if mmsize == 16 %if mmsize == 16
...@@ -195,11 +178,10 @@ WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 ...@@ -195,11 +178,10 @@ WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro
%macro BIWEIGHT_FUNC_DBL_MM 1 INIT_MMX
cglobal h264_biweight_16x%1_mmx2, 7, 7, 0 cglobal h264_biweight_16_mmx2, 7, 7, 0
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %1 movifnidn r3d, r3m
%if %1 == 16
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, 4 BIWEIGHT_STEPA 1, 2, 4
...@@ -211,23 +193,14 @@ cglobal h264_biweight_16x%1_mmx2, 7, 7, 0 ...@@ -211,23 +193,14 @@ cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
mova [r0+8], m0 mova [r0+8], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX %macro BIWEIGHT_FUNC_MM 3
BIWEIGHT_FUNC_DBL_MM 16 cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_FUNC_DBL_MM 8
%macro BIWEIGHT_FUNC_MM 4
cglobal h264_biweight_%1x%2_%4, 7, 7, %3
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %2 movifnidn r3d, r3m
%if %2 == 16
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, mmsize/2 BIWEIGHT_STEPA 1, 2, mmsize/2
...@@ -235,28 +208,22 @@ cglobal h264_biweight_%1x%2_%4, 7, 7, %3 ...@@ -235,28 +208,22 @@ cglobal h264_biweight_%1x%2_%4, 7, 7, %3
mova [r0], m0 mova [r0], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_%1x16_%4.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
BIWEIGHT_FUNC_MM 8, 16, 0, mmx2 BIWEIGHT_FUNC_MM 8, 0, mmx2
BIWEIGHT_FUNC_MM 8, 8, 0, mmx2
BIWEIGHT_FUNC_MM 8, 4, 0, mmx2
INIT_XMM INIT_XMM
BIWEIGHT_FUNC_MM 16, 16, 8, sse2 BIWEIGHT_FUNC_MM 16, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, 8, sse2
%macro BIWEIGHT_FUNC_HALF_MM 5 %macro BIWEIGHT_FUNC_HALF_MM 3
cglobal h264_biweight_%1x%2_%5, 7, 7, %4 cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %2/2 movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2] lea r4, [r2*2]
%if %2 == mmsize
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, r2 BIWEIGHT_STEPA 1, 2, r2
...@@ -270,31 +237,30 @@ cglobal h264_biweight_%1x%2_%5, 7, 7, %4 ...@@ -270,31 +237,30 @@ cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%endif %endif
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 BIWEIGHT_FUNC_HALF_MM 4, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
INIT_XMM INIT_XMM
BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
%macro BIWEIGHT_SSSE3_SETUP 0 %macro BIWEIGHT_SSSE3_SETUP 0
add r6, 1 %ifdef ARCH_X86_64
or r6, 1 %define off_regd r11d
add r3, 1 %else
movd m4, r4d %define off_regd r3d
movd m0, r5d %endif
movd m5, r6d mov off_regd, r7m
movd m6, r3d add off_regd, 1
or off_regd, 1
add r4, 1
movd m4, r5d
movd m0, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
punpcklbw m4, m0 punpcklbw m4, m0
...@@ -314,12 +280,11 @@ BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 ...@@ -314,12 +280,11 @@ BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m2 packuswb m0, m2
%endmacro %endmacro
%macro BIWEIGHT_SSSE3_16 1 INIT_XMM
cglobal h264_biweight_16x%1_ssse3, 7, 7, 8 cglobal h264_biweight_16_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
mov r3, %1 movifnidn r3d, r3m
%if %1 == 16
.nextrow .nextrow
movh m0, [r0] movh m0, [r0]
movh m2, [r0+8] movh m2, [r0+8]
...@@ -330,25 +295,17 @@ cglobal h264_biweight_16x%1_ssse3, 7, 7, 8 ...@@ -330,25 +295,17 @@ cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
mova [r0], m0 mova [r0], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM INIT_XMM
BIWEIGHT_SSSE3_16 16 cglobal h264_biweight_8_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_16 8
%macro BIWEIGHT_SSSE3_8 1
cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
mov r3, %1/2 movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2] lea r4, [r2*2]
%if %1 == 16
.nextrow .nextrow
movh m0, [r0] movh m0, [r0]
movh m1, [r1] movh m1, [r1]
...@@ -361,15 +318,6 @@ cglobal h264_biweight_8x%1_ssse3, 7, 7, 8 ...@@ -361,15 +318,6 @@ cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
movhps [r0+r2], m0 movhps [r0+r2], m0
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM
BIWEIGHT_SSSE3_8 16
BIWEIGHT_SSSE3_8 8
BIWEIGHT_SSSE3_8 4
...@@ -36,33 +36,26 @@ cextern pw_1 ...@@ -36,33 +36,26 @@ cextern pw_1
SECTION .text SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_weight(uint8_t *dst, int stride, int log2_denom, ; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
; int weight, int offset); ; int weight, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32 %macro WEIGHT_PROLOGUE 0
DECLARE_REG_TMP 2
%else
DECLARE_REG_TMP 10
%endif
%macro WEIGHT_PROLOGUE 1
mov t0, %1
.prologue .prologue
PROLOGUE 0,5,8 PROLOGUE 0,6,8
movifnidn r0, r0mp movifnidn r0, r0mp
movifnidn r1d, r1m movifnidn r1d, r1m
movifnidn r3d, r3m
movifnidn r4d, r4m movifnidn r4d, r4m
movifnidn r5d, r5m
%endmacro %endmacro
%macro WEIGHT_SETUP 1 %macro WEIGHT_SETUP 1
mova m0, [pw_1] mova m0, [pw_1]
movd m2, r2m movd m2, r3m
pslld m0, m2 ; 1<<log2_denom pslld m0, m2 ; 1<<log2_denom
SPLATW m0, m0 SPLATW m0, m0
shl r4, 19 ; *8, move to upper half of dword shl r5, 19 ; *8, move to upper half of dword
lea r4, [r4+r3*2+0x10000] lea r5, [r5+r4*2+0x10000]
movd m3, r4d ; weight<<1 | 1+(offset<<(3)) movd m3, r5d ; weight<<1 | 1+(offset<<(3))
pshufd m3, m3, 0 pshufd m3, m3, 0
mova m4, [pw_pixel_max] mova m4, [pw_pixel_max]
paddw m2, [sq_1] ; log2_denom+1 paddw m2, [sq_1] ; log2_denom+1
...@@ -96,8 +89,8 @@ DECLARE_REG_TMP 10 ...@@ -96,8 +89,8 @@ DECLARE_REG_TMP 10
%endmacro %endmacro
%macro WEIGHT_FUNC_DBL 1 %macro WEIGHT_FUNC_DBL 1
cglobal h264_weight_16x16_10_%1 cglobal h264_weight_16_10_%1
WEIGHT_PROLOGUE 16 WEIGHT_PROLOGUE
WEIGHT_SETUP %1 WEIGHT_SETUP %1
.nextrow .nextrow
WEIGHT_OP %1, 0 WEIGHT_OP %1, 0
...@@ -105,13 +98,9 @@ cglobal h264_weight_16x16_10_%1 ...@@ -105,13 +98,9 @@ cglobal h264_weight_16x16_10_%1
WEIGHT_OP %1, 16 WEIGHT_OP %1, 16
mova [r0+16], m5 mova [r0+16], m5
add r0, r1 add r0, r1
dec t0 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_weight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_16x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
...@@ -120,24 +109,16 @@ WEIGHT_FUNC_DBL sse4 ...@@ -120,24 +109,16 @@ WEIGHT_FUNC_DBL sse4
%macro WEIGHT_FUNC_MM 1 %macro WEIGHT_FUNC_MM 1
cglobal h264_weight_8x16_10_%1 cglobal h264_weight_8_10_%1
WEIGHT_PROLOGUE 16 WEIGHT_PROLOGUE
WEIGHT_SETUP %1 WEIGHT_SETUP %1
.nextrow .nextrow
WEIGHT_OP %1, 0 WEIGHT_OP %1, 0
mova [r0], m5 mova [r0], m5
add r0, r1 add r0, r1
dec t0 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_weight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
cglobal h264_weight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
...@@ -146,8 +127,9 @@ WEIGHT_FUNC_MM sse4 ...@@ -146,8 +127,9 @@ WEIGHT_FUNC_MM sse4
%macro WEIGHT_FUNC_HALF_MM 1 %macro WEIGHT_FUNC_HALF_MM 1
cglobal h264_weight_4x8_10_%1 cglobal h264_weight_4_10_%1
WEIGHT_PROLOGUE 4 WEIGHT_PROLOGUE
sar r2d, 1
WEIGHT_SETUP %1 WEIGHT_SETUP %1
lea r3, [r1*2] lea r3, [r1*2]
.nextrow .nextrow
...@@ -155,17 +137,9 @@ cglobal h264_weight_4x8_10_%1 ...@@ -155,17 +137,9 @@ cglobal h264_weight_4x8_10_%1
movh [r0], m5 movh [r0], m5
movhps [r0+r1], m5 movhps [r0+r1], m5
add r0, r3 add r0, r3
dec t0 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_weight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
cglobal h264_weight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
...@@ -174,40 +148,40 @@ WEIGHT_FUNC_HALF_MM sse4 ...@@ -174,40 +148,40 @@ WEIGHT_FUNC_HALF_MM sse4
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int log2_denom, ; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
; int weightd, int weights, int offset); ; int log2_denom, int weightd, int weights, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32 %ifdef ARCH_X86_32
DECLARE_REG_TMP 2,3 DECLARE_REG_TMP 3
%else %else
DECLARE_REG_TMP 10,2 DECLARE_REG_TMP 10
%endif %endif
%macro BIWEIGHT_PROLOGUE 1 %macro BIWEIGHT_PROLOGUE 0
mov t0, %1
.prologue .prologue
PROLOGUE 0,7,8 PROLOGUE 0,7,8
movifnidn r0, r0mp movifnidn r0, r0mp
movifnidn r1, r1mp movifnidn r1, r1mp
movifnidn t1d, r2m movifnidn r2d, r2m
movifnidn r4d, r4m
movifnidn r5d, r5m movifnidn r5d, r5m
movifnidn r6d, r6m movifnidn r6d, r6m
movifnidn t0d, r7m
%endmacro %endmacro
%macro BIWEIGHT_SETUP 1 %macro BIWEIGHT_SETUP 1
lea r6, [r6*4+1] ; (offset<<2)+1 lea t0, [t0*4+1] ; (offset<<2)+1
or r6, 1 or t0, 1
shl r5, 16 shl r6, 16
or r4, r5 or r5, r6
movd m4, r4d ; weightd | weights movd m4, r5d ; weightd | weights
movd m5, r6d ; (offset+1)|1 movd m5, t0d ; (offset+1)|1
movd m6, r3m ; log2_denom movd m6, r4m ; log2_denom
pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
paddd m6, [sq_1] paddd m6, [sq_1]
pshufd m4, m4, 0 pshufd m4, m4, 0
pshufd m5, m5, 0 pshufd m5, m5, 0
mova m3, [pw_pixel_max] mova m3, [pw_pixel_max]
movifnidn r3d, r3m
%ifnidn %1, sse4 %ifnidn %1, sse4
pxor m7, m7 pxor m7, m7
%endif %endif
...@@ -243,23 +217,19 @@ DECLARE_REG_TMP 10,2 ...@@ -243,23 +217,19 @@ DECLARE_REG_TMP 10,2
%endmacro %endmacro
%macro BIWEIGHT_FUNC_DBL 1 %macro BIWEIGHT_FUNC_DBL 1
cglobal h264_biweight_16x16_10_%1 cglobal h264_biweight_16_10_%1
BIWEIGHT_PROLOGUE 16 BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
BIWEIGHT %1, 0 BIWEIGHT %1, 0
mova [r0 ], m0 mova [r0 ], m0
BIWEIGHT %1, 16 BIWEIGHT %1, 16
mova [r0+16], m0 mova [r0+16], m0
add r0, t1 add r0, r2
add r1, t1 add r1, r2
dec t0 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_biweight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_16x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
...@@ -267,25 +237,17 @@ BIWEIGHT_FUNC_DBL sse2 ...@@ -267,25 +237,17 @@ BIWEIGHT_FUNC_DBL sse2
BIWEIGHT_FUNC_DBL sse4 BIWEIGHT_FUNC_DBL sse4
%macro BIWEIGHT_FUNC 1 %macro BIWEIGHT_FUNC 1
cglobal h264_biweight_8x16_10_%1 cglobal h264_biweight_8_10_%1
BIWEIGHT_PROLOGUE 16 BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
BIWEIGHT %1, 0 BIWEIGHT %1, 0
mova [r0], m0 mova [r0], m0
add r0, t1 add r0, r2
add r1, t1 add r1, r2
dec t0 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_biweight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
cglobal h264_biweight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
...@@ -293,27 +255,20 @@ BIWEIGHT_FUNC sse2 ...@@ -293,27 +255,20 @@ BIWEIGHT_FUNC sse2
BIWEIGHT_FUNC sse4 BIWEIGHT_FUNC sse4
%macro BIWEIGHT_FUNC_HALF 1 %macro BIWEIGHT_FUNC_HALF 1
cglobal h264_biweight_4x8_10_%1 cglobal h264_biweight_4_10_%1
BIWEIGHT_PROLOGUE 4 BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
lea r4, [t1*2] sar r3d, 1
lea r4, [r2*2]
.nextrow .nextrow
BIWEIGHT %1, 0, t1 BIWEIGHT %1, 0, r2
movh [r0 ], m0 movh [r0 ], m0
movhps [r0+t1], m0 movhps [r0+r2], m0
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec t0 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_biweight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
cglobal h264_biweight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
......
This diff is collapsed.
...@@ -10,7 +10,7 @@ OBJS = alldevices.o avdevice.o ...@@ -10,7 +10,7 @@ OBJS = alldevices.o avdevice.o
# input/output devices # input/output devices
OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \ OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \
alsa-audio-dec.o alsa-audio-dec.o timefilter.o
OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \ OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \
alsa-audio-enc.o alsa-audio-enc.o
OBJS-$(CONFIG_BKTR_INDEV) += bktr.o OBJS-$(CONFIG_BKTR_INDEV) += bktr.o
...@@ -19,7 +19,7 @@ OBJS-$(CONFIG_DSHOW_INDEV) += dshow.o dshow_enummediatypes.o \ ...@@ -19,7 +19,7 @@ OBJS-$(CONFIG_DSHOW_INDEV) += dshow.o dshow_enummediatypes.o \
dshow_pin.o dshow_common.o dshow_pin.o dshow_common.o
OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o
OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o
OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o timefilter.o
OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o
OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o
OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o
...@@ -39,4 +39,6 @@ OBJS-$(CONFIG_LIBDC1394_INDEV) += libdc1394.o ...@@ -39,4 +39,6 @@ OBJS-$(CONFIG_LIBDC1394_INDEV) += libdc1394.o
SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h
SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h
TESTPROGS = timefilter
include $(SRC_PATH)/subdir.mak include $(SRC_PATH)/subdir.mak
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include <alsa/asoundlib.h> #include <alsa/asoundlib.h>
#include "config.h" #include "config.h"
#include "libavutil/log.h" #include "libavutil/log.h"
#include "libavformat/timefilter.h" #include "timefilter.h"
#include "avdevice.h" #include "avdevice.h"
/* XXX: we make the assumption that the soundcard accepts this format */ /* XXX: we make the assumption that the soundcard accepts this format */
......
...@@ -28,7 +28,8 @@ ...@@ -28,7 +28,8 @@
#include "libavutil/fifo.h" #include "libavutil/fifo.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavformat/timefilter.h" #include "libavformat/avformat.h"
#include "timefilter.h"
#include "avdevice.h" #include "avdevice.h"
/** /**
......
...@@ -24,8 +24,8 @@ ...@@ -24,8 +24,8 @@
#include "config.h" #include "config.h"
#include "avformat.h"
#include "timefilter.h" #include "timefilter.h"
#include "libavutil/mem.h"
struct TimeFilter { struct TimeFilter {
/// Delay Locked Loop data. These variables refer to mathematical /// Delay Locked Loop data. These variables refer to mathematical
......
...@@ -22,8 +22,8 @@ ...@@ -22,8 +22,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#ifndef AVFORMAT_TIMEFILTER_H #ifndef AVDEVICE_TIMEFILTER_H
#define AVFORMAT_TIMEFILTER_H #define AVDEVICE_TIMEFILTER_H
/** /**
* Opaque type representing a time filter state * Opaque type representing a time filter state
...@@ -94,4 +94,4 @@ void ff_timefilter_reset(TimeFilter *); ...@@ -94,4 +94,4 @@ void ff_timefilter_reset(TimeFilter *);
*/ */
void ff_timefilter_destroy(TimeFilter *); void ff_timefilter_destroy(TimeFilter *);
#endif /* AVFORMAT_TIMEFILTER_H */ #endif /* AVDEVICE_TIMEFILTER_H */
...@@ -354,11 +354,8 @@ OBJS-$(CONFIG_RTP_PROTOCOL) += rtpproto.o ...@@ -354,11 +354,8 @@ OBJS-$(CONFIG_RTP_PROTOCOL) += rtpproto.o
OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o
OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o
# libavdevice dependencies
OBJS-$(CONFIG_ALSA_INDEV) += timefilter.o
OBJS-$(CONFIG_JACK_INDEV) += timefilter.o
TESTPROGS = seek timefilter TESTPROGS = seek
TOOLS = pktdumper probetest TOOLS = pktdumper probetest
include $(SRC_PATH)/subdir.mak include $(SRC_PATH)/subdir.mak
...@@ -228,8 +228,9 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst ...@@ -228,8 +228,9 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst
case AMF_DATA_TYPE_OBJECT: { case AMF_DATA_TYPE_OBJECT: {
unsigned int keylen; unsigned int keylen;
if (vstream && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1) if ((vstream || astream) && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1)
if (parse_keyframes_index(s, ioc, vstream, max_pos) < 0) if (parse_keyframes_index(s, ioc, vstream ? vstream : astream,
max_pos) < 0)
av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n"); av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n");
while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) { while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) {
......
...@@ -60,10 +60,13 @@ typedef struct FLVContext { ...@@ -60,10 +60,13 @@ typedef struct FLVContext {
int64_t duration_offset; int64_t duration_offset;
int64_t filesize_offset; int64_t filesize_offset;
int64_t duration; int64_t duration;
int delay; ///< first dts delay for AVC
int64_t last_ts;
} FLVContext; } FLVContext;
typedef struct FLVStreamContext {
int delay; ///< first dts delay for each stream (needed for AVC & Speex)
int64_t last_ts; ///< last timestamp for each stream
} FLVStreamContext;
static int get_audio_flags(AVCodecContext *enc){ static int get_audio_flags(AVCodecContext *enc){
int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT; int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT;
...@@ -182,6 +185,7 @@ static int flv_write_header(AVFormatContext *s) ...@@ -182,6 +185,7 @@ static int flv_write_header(AVFormatContext *s)
for(i=0; i<s->nb_streams; i++){ for(i=0; i<s->nb_streams; i++){
AVCodecContext *enc = s->streams[i]->codec; AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO) { if (enc->codec_type == AVMEDIA_TYPE_VIDEO) {
if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) { if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) {
framerate = av_q2d(s->streams[i]->r_frame_rate); framerate = av_q2d(s->streams[i]->r_frame_rate);
...@@ -199,6 +203,12 @@ static int flv_write_header(AVFormatContext *s) ...@@ -199,6 +203,12 @@ static int flv_write_header(AVFormatContext *s)
return -1; return -1;
} }
av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */ av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */
sc = av_mallocz(sizeof(FLVStreamContext));
if (!sc)
return AVERROR(ENOMEM);
s->streams[i]->priv_data = sc;
sc->last_ts = -1;
} }
avio_write(pb, "FLV", 3); avio_write(pb, "FLV", 3);
avio_w8(pb,1); avio_w8(pb,1);
...@@ -218,8 +228,6 @@ static int flv_write_header(AVFormatContext *s) ...@@ -218,8 +228,6 @@ static int flv_write_header(AVFormatContext *s)
} }
} }
flv->last_ts = -1;
/* write meta_tag */ /* write meta_tag */
avio_w8(pb, 18); // tag type META avio_w8(pb, 18); // tag type META
metadata_size_pos= avio_tell(pb); metadata_size_pos= avio_tell(pb);
...@@ -361,9 +369,10 @@ static int flv_write_trailer(AVFormatContext *s) ...@@ -361,9 +369,10 @@ static int flv_write_trailer(AVFormatContext *s)
/* Add EOS tag */ /* Add EOS tag */
for (i = 0; i < s->nb_streams; i++) { for (i = 0; i < s->nb_streams; i++) {
AVCodecContext *enc = s->streams[i]->codec; AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc = s->streams[i]->priv_data;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO && if (enc->codec_type == AVMEDIA_TYPE_VIDEO &&
(enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) { (enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) {
put_avc_eos_tag(pb, flv->last_ts); put_avc_eos_tag(pb, sc->last_ts);
} }
} }
...@@ -384,6 +393,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) ...@@ -384,6 +393,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
AVIOContext *pb = s->pb; AVIOContext *pb = s->pb;
AVCodecContext *enc = s->streams[pkt->stream_index]->codec; AVCodecContext *enc = s->streams[pkt->stream_index]->codec;
FLVContext *flv = s->priv_data; FLVContext *flv = s->priv_data;
FLVStreamContext *sc = s->streams[pkt->stream_index]->priv_data;
unsigned ts; unsigned ts;
int size= pkt->size; int size= pkt->size;
uint8_t *data= NULL; uint8_t *data= NULL;
...@@ -434,20 +444,20 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) ...@@ -434,20 +444,20 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n"); av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n");
return -1; return -1;
} }
if (!flv->delay && pkt->dts < 0) if (!sc->delay && pkt->dts < 0)
flv->delay = -pkt->dts; sc->delay = -pkt->dts;
ts = pkt->dts + flv->delay; // add delay to force positive dts ts = pkt->dts + sc->delay; // add delay to force positive dts
/* check Speex packet duration */ /* check Speex packet duration */
if (enc->codec_id == CODEC_ID_SPEEX && ts - flv->last_ts > 160) { if (enc->codec_id == CODEC_ID_SPEEX && ts - sc->last_ts > 160) {
av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than " av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than "
"8 frames per packet. Adobe Flash " "8 frames per packet. Adobe Flash "
"Player cannot handle this!\n"); "Player cannot handle this!\n");
} }
if (flv->last_ts < ts) if (sc->last_ts < ts)
flv->last_ts = ts; sc->last_ts = ts;
avio_wb24(pb,size + flags_size); avio_wb24(pb,size + flags_size);
avio_wb24(pb,ts); avio_wb24(pb,ts);
...@@ -471,7 +481,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt) ...@@ -471,7 +481,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
avio_write(pb, data ? data : pkt->data, size); avio_write(pb, data ? data : pkt->data, size);
avio_wb32(pb,size+flags_size+11); // previous tag size avio_wb32(pb,size+flags_size+11); // previous tag size
flv->duration = FFMAX(flv->duration, pkt->pts + flv->delay + pkt->duration); flv->duration = FFMAX(flv->duration, pkt->pts + sc->delay + pkt->duration);
avio_flush(pb); avio_flush(pb);
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "riff.h" #include "riff.h"
#include "isom.h" #include "isom.h"
#include "libavcodec/get_bits.h" #include "libavcodec/get_bits.h"
#include "id3v1.h"
#if CONFIG_ZLIB #if CONFIG_ZLIB
#include <zlib.h> #include <zlib.h>
...@@ -99,31 +100,48 @@ static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb, ...@@ -99,31 +100,48 @@ static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
return 0; return 0;
} }
static int mov_metadata_int8(MOVContext *c, AVIOContext *pb, static int mov_metadata_int8_bypass_padding(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key) unsigned len, const char *key)
{ {
char buf[16]; char buf[16];
/* bypass padding bytes */
avio_r8(pb);
avio_r8(pb);
avio_r8(pb);
snprintf(buf, sizeof(buf), "%hu", avio_r8(pb));
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0;
}
/* bypass padding bytes */ static int mov_metadata_int8_no_padding(MOVContext *c, AVIOContext *pb,
avio_r8(pb); unsigned len, const char *key)
avio_r8(pb); {
avio_r8(pb); char buf[16];
snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); snprintf(buf, sizeof(buf), "%hu", avio_r8(pb));
av_dict_set(&c->fc->metadata, key, buf, 0); av_dict_set(&c->fc->metadata, key, buf, 0);
return 0; return 0;
} }
static int mov_metadata_stik(MOVContext *c, AVIOContext *pb, static int mov_metadata_gnre(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key) unsigned len, const char *key)
{ {
char buf[16]; short genre;
char buf[20];
snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); avio_r8(pb); // unknown
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0; genre = avio_r8(pb);
if (genre < 1 || genre > ID3v1_GENRE_MAX)
return 0;
snprintf(buf, sizeof(buf), "%s", ff_id3v1_genre_str[genre-1]);
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0;
} }
static const uint32_t mac_to_unicode[128] = { static const uint32_t mac_to_unicode[128] = {
...@@ -189,6 +207,8 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) ...@@ -189,6 +207,8 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG(0xa9,'a','l','b'): key = "album"; break; case MKTAG(0xa9,'a','l','b'): key = "album"; break;
case MKTAG(0xa9,'d','a','y'): key = "date"; break; case MKTAG(0xa9,'d','a','y'): key = "date"; break;
case MKTAG(0xa9,'g','e','n'): key = "genre"; break; case MKTAG(0xa9,'g','e','n'): key = "genre"; break;
case MKTAG( 'g','n','r','e'): key = "genre";
parse = mov_metadata_gnre; break;
case MKTAG(0xa9,'t','o','o'): case MKTAG(0xa9,'t','o','o'):
case MKTAG(0xa9,'s','w','r'): key = "encoder"; break; case MKTAG(0xa9,'s','w','r'): key = "encoder"; break;
case MKTAG(0xa9,'e','n','c'): key = "encoder"; break; case MKTAG(0xa9,'e','n','c'): key = "encoder"; break;
...@@ -202,11 +222,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) ...@@ -202,11 +222,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG( 'd','i','s','k'): key = "disc"; case MKTAG( 'd','i','s','k'): key = "disc";
parse = mov_metadata_track_or_disc_number; break; parse = mov_metadata_track_or_disc_number; break;
case MKTAG( 't','v','e','s'): key = "episode_sort"; case MKTAG( 't','v','e','s'): key = "episode_sort";
parse = mov_metadata_int8; break; parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 't','v','s','n'): key = "season_number"; case MKTAG( 't','v','s','n'): key = "season_number";
parse = mov_metadata_int8; break; parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 's','t','i','k'): key = "media_type"; case MKTAG( 's','t','i','k'): key = "media_type";
parse = mov_metadata_stik; break; parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'h','d','v','d'): key = "hd_video";
parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'p','g','a','p'): key = "gapless_playback";
parse = mov_metadata_int8_no_padding; break;
} }
if (c->itunes_metadata && atom.size > 8) { if (c->itunes_metadata && atom.size > 8) {
......
...@@ -859,6 +859,29 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = { ...@@ -859,6 +859,29 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
}, },
.flags = PIX_FMT_BE, .flags = PIX_FMT_BE,
}, },
[PIX_FMT_YUV422P9LE] = {
.name = "yuv422p9le",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
},
[PIX_FMT_YUV422P9BE] = {
.name = "yuv422p9be",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
.flags = PIX_FMT_BE,
},
[PIX_FMT_YUV422P10LE] = { [PIX_FMT_YUV422P10LE] = {
.name = "yuv422p10le", .name = "yuv422p10le",
.nb_components= 3, .nb_components= 3,
......
...@@ -149,12 +149,15 @@ enum PixelFormat { ...@@ -149,12 +149,15 @@ enum PixelFormat {
PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R. PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R.
PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
}; };
...@@ -182,6 +185,7 @@ enum PixelFormat { ...@@ -182,6 +185,7 @@ enum PixelFormat {
#define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE) #define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
#define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE) #define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE)
#define PIX_FMT_YUV422P9 PIX_FMT_NE(YUV422P9BE , YUV422P9LE)
#define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE) #define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE)
#define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE) #define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
#define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE) #define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE)
......
...@@ -536,6 +536,18 @@ ...@@ -536,6 +536,18 @@
%endif %endif
%endmacro %endmacro
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE 1
shufps %1, %1, 0
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro CLIPW 3 ;(dst, min, max) %macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2 pmaxsw %1, %2
pminsw %1, %3 pminsw %1, %3
......
...@@ -2843,6 +2843,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ...@@ -2843,6 +2843,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE: case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE: case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P10LE:
...@@ -2852,6 +2853,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ...@@ -2852,6 +2853,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break; case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
#else #else
case PIX_FMT_YUV444P9BE: case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE: case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV422P10BE:
...@@ -2912,6 +2914,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ...@@ -2912,6 +2914,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
switch (srcFormat) { switch (srcFormat) {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE: case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE: case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P10LE:
...@@ -2922,6 +2925,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ...@@ -2922,6 +2925,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break; case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
#else #else
case PIX_FMT_YUV444P9BE: case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE: case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV422P10BE:
......
...@@ -547,6 +547,8 @@ const char *sws_format_name(enum PixelFormat format); ...@@ -547,6 +547,8 @@ const char *sws_format_name(enum PixelFormat format);
#define isNBPS(x) ( \ #define isNBPS(x) ( \
(x)==PIX_FMT_YUV420P9LE \ (x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV420P9BE \ || (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \ || (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV444P9LE \ || (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV422P10BE \
...@@ -574,6 +576,7 @@ const char *sws_format_name(enum PixelFormat format); ...@@ -574,6 +576,7 @@ const char *sws_format_name(enum PixelFormat format);
#define isPlanarYUV(x) ( \ #define isPlanarYUV(x) ( \
isPlanar8YUV(x) \ isPlanar8YUV(x) \
|| (x)==PIX_FMT_YUV420P9LE \ || (x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV444P9LE \ || (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P10LE \
|| (x)==PIX_FMT_YUV422P10LE \ || (x)==PIX_FMT_YUV422P10LE \
...@@ -583,6 +586,7 @@ const char *sws_format_name(enum PixelFormat format); ...@@ -583,6 +586,7 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \ || (x)==PIX_FMT_YUV444P16LE \
|| (x)==PIX_FMT_YUV420P9BE \ || (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \ || (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV420P10BE \ || (x)==PIX_FMT_YUV420P10BE \
|| (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV422P10BE \
......
...@@ -136,6 +136,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { ...@@ -136,6 +136,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_YUV420P9LE] = { 1 , 1 }, [PIX_FMT_YUV420P9LE] = { 1 , 1 },
[PIX_FMT_YUV420P10BE] = { 1 , 1 }, [PIX_FMT_YUV420P10BE] = { 1 , 1 },
[PIX_FMT_YUV420P10LE] = { 1 , 1 }, [PIX_FMT_YUV420P10LE] = { 1 , 1 },
[PIX_FMT_YUV422P9BE] = { 1 , 1 },
[PIX_FMT_YUV422P9LE] = { 1 , 1 },
[PIX_FMT_YUV422P10BE] = { 1 , 1 }, [PIX_FMT_YUV422P10BE] = { 1 , 1 },
[PIX_FMT_YUV422P10LE] = { 1 , 1 }, [PIX_FMT_YUV422P10LE] = { 1 , 1 },
[PIX_FMT_YUV444P9BE] = { 1 , 1 }, [PIX_FMT_YUV444P9BE] = { 1 , 1 },
...@@ -280,15 +282,18 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi ...@@ -280,15 +282,18 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
if (flags & SWS_BICUBIC) { if (flags & SWS_BICUBIC) {
int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24); int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24);
int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24); int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
int64_t dd = ( d*d)>>30;
int64_t ddd= (dd*d)>>30;
if (d < 1LL<<30) if (d >= 1LL<<31) {
coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30); coeff = 0.0;
else if (d < 1LL<<31) } else {
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30); int64_t dd = (d * d) >> 30;
else int64_t ddd = (dd * d) >> 30;
coeff=0.0;
if (d < 1LL<<30)
coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
else
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
}
coeff *= fone>>(30+24); coeff *= fone>>(30+24);
} }
/* else if (flags & SWS_X) { /* else if (flags & SWS_X) {
......
...@@ -790,8 +790,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int ...@@ -790,8 +790,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
y_table32 = c->yuvTable; y_table32 = c->yuvTable;
yb = -(384<<16) - oy; yb = -(384<<16) - oy;
for (i = 0; i < 1024; i++) { for (i = 0; i < 1024; i++) {
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase)); y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255u << abase));
y_table32[i+1024] = yval << gbase; y_table32[i+1024] = yval << gbase;
y_table32[i+2048] = yval << bbase; y_table32[i+2048] = yval << bbase;
yb += cy; yb += cy;
......
...@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 ...@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
......
...@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 ...@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
......
...@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 ...@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
......
...@@ -42,6 +42,8 @@ yuv422p10be cea7ca6b0e66d6f29539885896c88603 ...@@ -42,6 +42,8 @@ yuv422p10be cea7ca6b0e66d6f29539885896c88603
yuv422p10le a10c4a5837547716f13cd61918b145f9 yuv422p10le a10c4a5837547716f13cd61918b145f9
yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c
yuv422p16le 61bfcee8e54465f760164f5a75d40b5e yuv422p16le 61bfcee8e54465f760164f5a75d40b5e
yuv422p9be 82494823944912f73cebc58ad2979bbd
yuv422p9le fc69c8a21f473916a4b4225636b97e06
yuv440p 461503fdb9b90451020aa3b25ddf041c yuv440p 461503fdb9b90451020aa3b25ddf041c
yuv444p 81b2eba962d12e8d64f003ac56f6faf2 yuv444p 81b2eba962d12e8d64f003ac56f6faf2
yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9 yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9
......
...@@ -42,6 +42,8 @@ yuv422p10be 588fe319b96513c32e21d3e32b45447f ...@@ -42,6 +42,8 @@ yuv422p10be 588fe319b96513c32e21d3e32b45447f
yuv422p10le 11b57f2bd9661024153f3973b9090cdb yuv422p10le 11b57f2bd9661024153f3973b9090cdb
yuv422p16be c092d083548c2a144c372a98c46875c7 yuv422p16be c092d083548c2a144c372a98c46875c7
yuv422p16le c071b9397a416d51cbe339345cbcba84 yuv422p16le c071b9397a416d51cbe339345cbcba84
yuv422p9be 7c6f1e140b3999ee7d923854e507752a
yuv422p9le 51f10d79c07989060dd06e767e6d7d60
yuv440p 876385e96165acf51271b20e5d85a416 yuv440p 876385e96165acf51271b20e5d85a416
yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7 yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7
yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment