Commit aedc9086 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master: (35 commits)
  flvdec: Do not call parse_keyframes_index with a NULL stream
  libspeexdec: include system headers before local headers
  libspeexdec: return meaningful error codes
  libspeexdec: cosmetics: reindent
  libspeexdec: decode one frame at a time.
  swscale: fix signed shift overflows in ff_yuv2rgb_c_init_tables()
  Move timefilter code from lavf to lavd.
  mov: add support for hdvd and pgapmetadata atoms
  mov: rename function _stik, some indentation cosmetics
  mov: rename function _int8 to remove ambiguity, some indentation cosmetics
  mov: parse the gnre atom
  mp3on4: check for allocation failures in decode_init_mp3on4()
  mp3on4: create a separate flush function for MP3onMP4.
  mp3on4: ensure that the frame channel count does not exceed the codec channel count.
  mp3on4: set channel layout
  mp3on4: fix the output channel order
  mp3on4: allocate temp buffer with av_malloc() instead of on the stack.
  mp3on4: copy MPADSPContext from first context to all contexts.
  fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
  fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm
  ...

Conflicts:
	libavcodec/arm/h264dsp_init_arm.c
	libavcodec/h264.c
	libavcodec/h264.h
	libavcodec/h264_cabac.c
	libavcodec/h264_cavlc.c
	libavcodec/h264_ps.c
	libavcodec/h264dsp_template.c
	libavcodec/h264idct_template.c
	libavcodec/h264pred.c
	libavcodec/h264pred_template.c
	libavcodec/x86/h264dsp_mmx.c
	libavdevice/Makefile
	libavdevice/jack_audio.c
	libavformat/Makefile
	libavformat/flvdec.c
	libavformat/flvenc.c
	libavutil/pixfmt.h
	libswscale/utils.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 1a7090bf f4b51d06
......@@ -67,6 +67,7 @@ easier to use. The changes are:
- aevalsrc audio source added
- Ut Video decoder
- Speex encoding via libspeex
- 4:2:2 H.264 decoding support
version 0.8:
......
......@@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
int log2_den, int weight, int offset);
void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
int height, int log2_den, int weightd,
int weights, int offset);
void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
......@@ -101,23 +76,14 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
}
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon;
c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon;
c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon;
c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
c->h264_idct_add = ff_h264_idct_add_neon;
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
......
......@@ -1592,7 +1592,7 @@ endfunc
vdup.8 d1, r5
vmov q2, q8
vmov q3, q8
1: subs ip, ip, #2
1: subs r3, r3, #2
vld1.8 {d20-d21},[r0,:128], r2
\macd q2, d0, d20
pld [r0]
......@@ -1632,7 +1632,7 @@ endfunc
vdup.8 d1, r5
vmov q1, q8
vmov q10, q8
1: subs ip, ip, #2
1: subs r3, r3, #2
vld1.8 {d4},[r0,:64], r2
\macd q1, d0, d4
pld [r0]
......@@ -1662,7 +1662,7 @@ endfunc
vdup.8 d1, r5
vmov q1, q8
vmov q10, q8
1: subs ip, ip, #4
1: subs r3, r3, #4
vld1.32 {d4[0]},[r0,:32], r2
vld1.32 {d4[1]},[r0,:32], r2
\macd q1, d0, d4
......@@ -1700,16 +1700,17 @@ endfunc
.endm
.macro biweight_func w
function biweight_h264_pixels_\w\()_neon
function ff_biweight_h264_pixels_\w\()_neon, export=1
push {r4-r6, lr}
add r4, sp, #16
ldr r12, [sp, #16]
add r4, sp, #20
ldm r4, {r4-r6}
lsr lr, r4, #31
add r6, r6, #1
eors lr, lr, r5, lsr #30
orr r6, r6, #1
vdup.16 q9, r3
lsl r6, r6, r3
vdup.16 q9, r12
lsl r6, r6, r12
vmvn q9, q9
vdup.16 q8, r6
mov r6, r0
......@@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon
endfunc
.endm
.macro biweight_entry w, h, b=1
function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b biweight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
biweight_entry 16, 8
biweight_entry 16, 16, b=0
biweight_func 16
biweight_entry 8, 16
biweight_entry 8, 4
biweight_entry 8, 8, b=0
biweight_func 8
biweight_entry 4, 8
biweight_entry 4, 2
biweight_entry 4, 4, b=0
biweight_func 4
@ Weighted prediction
.macro weight_16 add
vdup.8 d0, r3
1: subs ip, ip, #2
vdup.8 d0, r12
1: subs r2, r2, #2
vld1.8 {d20-d21},[r0,:128], r1
vmull.u8 q2, d0, d20
pld [r0]
......@@ -1785,8 +1767,8 @@ endfunc
.endm
.macro weight_8 add
vdup.8 d0, r3
1: subs ip, ip, #2
vdup.8 d0, r12
1: subs r2, r2, #2
vld1.8 {d4},[r0,:64], r1
vmull.u8 q1, d0, d4
pld [r0]
......@@ -1806,10 +1788,10 @@ endfunc
.endm
.macro weight_4 add
vdup.8 d0, r3
vdup.8 d0, r12
vmov q1, q8
vmov q10, q8
1: subs ip, ip, #4
1: subs r2, r2, #4
vld1.32 {d4[0]},[r0,:32], r1
vld1.32 {d4[1]},[r0,:32], r1
vmull.u8 q1, d0, d4
......@@ -1842,50 +1824,32 @@ endfunc
.endm
.macro weight_func w
function weight_h264_pixels_\w\()_neon
function ff_weight_h264_pixels_\w\()_neon, export=1
push {r4, lr}
ldr r4, [sp, #8]
cmp r2, #1
lsl r4, r4, r2
ldr r12, [sp, #8]
ldr r4, [sp, #12]
cmp r3, #1
lsl r4, r4, r3
vdup.16 q8, r4
mov r4, r0
ble 20f
rsb lr, r2, #1
rsb lr, r3, #1
vdup.16 q9, lr
cmp r3, #0
cmp r12, #0
blt 10f
weight_\w vhadd.s16
10: rsb r3, r3, #0
10: rsb r12, r12, #0
weight_\w vhsub.s16
20: rsb lr, r2, #0
20: rsb lr, r3, #0
vdup.16 q9, lr
cmp r3, #0
cmp r12, #0
blt 10f
weight_\w vadd.s16
10: rsb r3, r3, #0
10: rsb r12, r12, #0
weight_\w vsub.s16
endfunc
.endm
.macro weight_entry w, h, b=1
function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b weight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
weight_entry 16, 8
weight_entry 16, 16, b=0
weight_func 16
weight_entry 8, 16
weight_entry 8, 4
weight_entry 8, 8, b=0
weight_func 8
weight_entry 4, 8
weight_entry 4, 2
weight_entry 4, 4, b=0
weight_func 4
......@@ -70,7 +70,15 @@ typedef struct FmtConvertContext {
long len, int channels);
/**
* Convert an array of interleaved float to multiple arrays of float.
* Convert multiple arrays of float to an array of interleaved float.
*
* @param dst destination array of interleaved float.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/
void (*float_interleave)(float *dst, const float **src, unsigned int len,
int channels);
......
This diff is collapsed.
......@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
static av_always_inline void
decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
const uint32_t *qmul, int max_coeff,
int is_dc, int chroma422)
{
static const int significant_coeff_flag_offset[2][14] = {
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
{ 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
......@@ -1593,7 +1598,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
* map node ctx => cabac ctx for level=1 */
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
/* map node ctx => cabac ctx for level>1 */
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
static const uint8_t coeff_abs_levelgt1_ctx[2][8] = {
{ 5, 5, 5, 5, 6, 7, 8, 9 },
{ 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case
};
static const uint8_t coeff_abs_level_transition[2][8] = {
/* update node ctx after decoding a level=1 */
{ 1, 2, 3, 3, 4, 5, 6, 7 },
......@@ -1652,7 +1660,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
last_coeff_ctx_base, sig_off);
} else {
if (is_dc && max_coeff == 8) { // dc 422
if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
......@@ -1661,7 +1669,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
#else
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
} else {
if (is_dc && max_coeff == 8) { // dc 422
if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else {
DECODE_SIGNIFICANCE(max_coeff - 1, last, last);
......@@ -1701,9 +1709,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} \
} else { \
int coeff_abs = 2; \
if (is_dc && max_coeff == 8) \
node_ctx = FFMIN(node_ctx, 6); \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \
\
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
......@@ -1745,11 +1751,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
}
static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1);
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0);
}
static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1);
}
static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0);
}
/* cat: 0-> DC 16x16 n = 0
......@@ -1773,6 +1786,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
}
static av_always_inline void
decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
/* read coded block flag */
if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) {
h->non_zero_count_cache[scan8[n]] = 0;
return;
}
decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff);
}
static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
/* read coded block flag */
if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
......@@ -2325,17 +2351,14 @@ decode_intra_mb:
if(CHROMA444){
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
} else {
const int num_c8x8 = h->sps.chroma_format_idc;
} else if (CHROMA422) {
if( cbp&0x30 ){
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
CHROMA_DC_BLOCK_INDEX+c,
CHROMA422 ? chroma422_dc_scan : chroma_dc_scan,
4*num_c8x8);
decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
CHROMA_DC_BLOCK_INDEX + c,
chroma422_dc_scan, 8);
}
}
......@@ -2344,7 +2367,7 @@ decode_intra_mb:
for( c = 0; c < 2; c++ ) {
DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for (i8x8 = 0; i8x8 < num_c8x8; i8x8++) {
for (i8x8 = 0; i8x8 < 2; i8x8++) {
for (i = 0; i < 4; i++) {
const int index = 16 + 16 * c + 8*i8x8 + i;
//av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16);
......@@ -2357,6 +2380,29 @@ decode_intra_mb:
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
} else /* yuv420 */ {
if( cbp&0x30 ){
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
if( cbp&0x20 ) {
int c, i;
for( c = 0; c < 2; c++ ) {
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for( i = 0; i < 4; i++ ) {
const int index = 16 + 16 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
}
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
......
......@@ -64,26 +64,14 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo
else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
\
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\
\
c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
......
......@@ -31,16 +31,18 @@
#include "dsputil.h"
//typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset);
/**
* Context for storing H.264 DSP functions
*/
typedef struct H264DSPContext{
/* weighted MC */
h264_weight_func weight_h264_pixels_tab[10];
h264_biweight_func biweight_h264_pixels_tab[10];
h264_weight_func weight_h264_pixels_tab[4];
h264_biweight_func biweight_h264_pixels_tab[4];
/* loop filter */
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
......
......@@ -29,14 +29,16 @@
#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
#define H264_WEIGHT(W,H) \
static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int stride, int log2_denom, int weight, int offset){ \
#define H264_WEIGHT(W) \
static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \
int log2_denom, int weight, int offset) \
{ \
int y; \
pixel *block = (pixel*)p_block; \
pixel *block = (pixel*)_block; \
stride >>= sizeof(pixel)-1; \
offset <<= (log2_denom + (BIT_DEPTH-8)); \
if(log2_denom) offset += 1<<(log2_denom-1); \
for(y=0; y<H; y++, block += stride){ \
for (y = 0; y < height; y++, block += stride) { \
op_scale1(0); \
op_scale1(1); \
if(W==2) continue; \
......@@ -58,14 +60,16 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int strid
op_scale1(15); \
} \
} \
static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \
static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset) \
{ \
int y; \
pixel *dst = (pixel*)_dst; \
pixel *src = (pixel*)_src; \
stride >>= sizeof(pixel)-1; \
offset <<= (BIT_DEPTH-8); \
offset = ((offset + 1) | 1) << log2_denom; \
for(y=0; y<H; y++, dst += stride, src += stride){ \
for (y = 0; y < height; y++, dst += stride, src += stride) { \
op_scale2(0); \
op_scale2(1); \
if(W==2) continue; \
......@@ -88,16 +92,10 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_
} \
}
H264_WEIGHT(16,16)
H264_WEIGHT(16,8)
H264_WEIGHT(8,16)
H264_WEIGHT(8,8)
H264_WEIGHT(8,4)
H264_WEIGHT(4,8)
H264_WEIGHT(4,4)
H264_WEIGHT(4,2)
H264_WEIGHT(2,4)
H264_WEIGHT(2,2)
H264_WEIGHT(16)
H264_WEIGHT(8)
H264_WEIGHT(4)
H264_WEIGHT(2)
#undef op_scale1
#undef op_scale2
......
......@@ -228,16 +228,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
int i, j;
#if 0
av_log(NULL, AV_LOG_INFO, "idct\n");
int32_t *b = block;
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
for(j=1; j<3; j++){
for(i=j*16; i<j*16+4; i++){
if(nnzc[ scan8[i] ])
......@@ -296,13 +286,13 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in
#undef stride
}
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2;
const int xStride= 16;
int i;
int temp[8];
static const uint8_t x_offset[2]={0, 16};
dctcoef *block = (dctcoef*)p_block;
dctcoef *block = (dctcoef*)_block;
for(i=0; i<4; i++){
temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
......@@ -321,22 +311,13 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
}
#if 0
av_log(NULL, AV_LOG_INFO, "after chroma dc\n");
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", block[i]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
}
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2;
const int xStride= 16;
int a,b,c,d,e;
dctcoef *block = (dctcoef*)p_block;
dctcoef *block = (dctcoef*)_block;
a= block[stride*0 + xStride*0];
b= block[stride*0 + xStride*1];
......
......@@ -462,10 +462,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\
}\
}else{\
h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
......@@ -510,8 +510,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\
h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\
h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\
if (chroma_format_idc == 1) {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\
} else {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add , depth);\
}\
h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\
h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\
......
......@@ -663,23 +663,45 @@ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred4x4_dc)(src, NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred8x16_top_dc)(src, stride);
FUNCC(pred4x4_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x8_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x16_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
}
static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
int j, k;
int a;
......@@ -1126,8 +1148,24 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, c
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}
......@@ -18,11 +18,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include <speex/speex.h>
#include <speex/speex_header.h>
#include <speex/speex_stereo.h>
#include <speex/speex_callbacks.h>
#include "avcodec.h"
typedef struct {
SpeexBits bits;
......@@ -60,14 +60,14 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx)
mode = speex_lib_get_mode(s->header->mode);
if (!mode) {
av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode);
return -1;
return AVERROR_INVALIDDATA;
}
} else
av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n");
if (avctx->channels > 2) {
av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n");
return -1;
return AVERROR(EINVAL);
}
speex_bits_init(&s->bits);
......@@ -99,32 +99,42 @@ static int libspeex_decode_frame(AVCodecContext *avctx,
uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
LibSpeexContext *s = avctx->priv_data;
int16_t *output = data, *end;
int i, num_samples;
num_samples = s->frame_size * avctx->channels;
end = output + *data_size / sizeof(*output);
int16_t *output = data;
int out_size, ret, consumed = 0;
/* check output buffer size */
out_size = s->frame_size * avctx->channels *
av_get_bytes_per_sample(avctx->sample_fmt);
if (*data_size < out_size) {
av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n");
return AVERROR(EINVAL);
}
/* if there is not enough data left for the smallest possible frame,
reset the libspeex buffer using the current packet, otherwise ignore
the current packet and keep decoding frames from the libspeex buffer. */
if (speex_bits_remaining(&s->bits) < 43) {
/* check for flush packet */
if (!buf || !buf_size) {
*data_size = 0;
return buf_size;
}
/* set new buffer */
speex_bits_read_from(&s->bits, buf, buf_size);
consumed = buf_size;
}
for (i = 0; speex_bits_remaining(&s->bits) && output + num_samples < end; i++) {
int ret = speex_decode_int(s->dec_state, &s->bits, output);
/* decode a single frame */
ret = speex_decode_int(s->dec_state, &s->bits, output);
if (ret <= -2) {
av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n");
return -1;
} else if (ret == -1)
// end of stream
break;
return AVERROR_INVALIDDATA;
}
if (avctx->channels == 2)
speex_decode_stereo_int(output, s->frame_size, &s->stereo);
output += num_samples;
}
avctx->frame_size = s->frame_size * i;
*data_size = avctx->channels * avctx->frame_size * sizeof(*output);
return buf_size;
*data_size = out_size;
return consumed;
}
static av_cold int libspeex_decode_close(AVCodecContext *avctx)
......@@ -138,6 +148,12 @@ static av_cold int libspeex_decode_close(AVCodecContext *avctx)
return 0;
}
static av_cold void libspeex_decode_flush(AVCodecContext *avctx)
{
LibSpeexContext *s = avctx->priv_data;
speex_bits_reset(&s->bits);
}
AVCodec ff_libspeex_decoder = {
.name = "libspeex",
.type = AVMEDIA_TYPE_AUDIO,
......@@ -146,5 +162,7 @@ AVCodec ff_libspeex_decoder = {
.init = libspeex_decode_init,
.close = libspeex_decode_close,
.decode = libspeex_decode_frame,
.flush = libspeex_decode_flush,
.capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY,
.long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
};
......@@ -1893,24 +1893,50 @@ typedef struct MP3On4DecodeContext {
int syncword; ///< syncword patch
const uint8_t *coff; ///< channels offsets in output buffer
MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
OUT_INT *decoded_buf; ///< output buffer for decoded samples
} MP3On4DecodeContext;
#include "mpeg4audio.h"
/* Next 3 arrays are indexed by channel config number (passed via codecdata) */
static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */
/* offsets into output buffer, assume output order is FL FR BL BR C LFE */
/* offsets into output buffer, assume output order is FL FR C LFE BL BR SL SR */
static const uint8_t chan_offset[8][5] = {
{0},
{0}, // C
{0}, // FLR
{2,0}, // C FLR
{2,0,3}, // C FLR BS
{4,0,2}, // C FLR BLRS
{4,0,2,5}, // C FLR BLRS LFE
{4,0,2,6,5}, // C FLR BLRS BLR LFE
{2,0,3}, // C FLR BLRS
{2,0,4,3}, // C FLR BLRS LFE
{2,0,6,4,3}, // C FLR BLRS BLR LFE
};
/* mp3on4 channel layouts */
static const int16_t chan_layout[8] = {
0,
AV_CH_LAYOUT_MONO,
AV_CH_LAYOUT_STEREO,
AV_CH_LAYOUT_SURROUND,
AV_CH_LAYOUT_4POINT0,
AV_CH_LAYOUT_5POINT0,
AV_CH_LAYOUT_5POINT1,
AV_CH_LAYOUT_7POINT1
};
static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
{
MP3On4DecodeContext *s = avctx->priv_data;
int i;
for (i = 0; i < s->frames; i++)
av_free(s->mp3decctx[i]);
av_freep(&s->decoded_buf);
return 0;
}
static int decode_init_mp3on4(AVCodecContext * avctx)
{
......@@ -1931,6 +1957,7 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
s->frames = mp3Frames[cfg.chan_config];
s->coff = chan_offset[cfg.chan_config];
avctx->channels = ff_mpeg4audio_channels[cfg.chan_config];
avctx->channel_layout = chan_layout[cfg.chan_config];
if (cfg.sample_rate < 16000)
s->syncword = 0xffe00000;
......@@ -1944,6 +1971,8 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/
// Allocate zeroed memory for the first decoder context
s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[0])
goto alloc_fail;
// Put decoder context in place to make init_decode() happy
avctx->priv_data = s->mp3decctx[0];
decode_init(avctx);
......@@ -1956,23 +1985,38 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/
for (i = 1; i < s->frames; i++) {
s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[i])
goto alloc_fail;
s->mp3decctx[i]->adu_mode = 1;
s->mp3decctx[i]->avctx = avctx;
s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp;
}
/* Allocate buffer for multi-channel output if needed */
if (s->frames > 1) {
s->decoded_buf = av_malloc(MPA_FRAME_SIZE * MPA_MAX_CHANNELS *
sizeof(*s->decoded_buf));
if (!s->decoded_buf)
goto alloc_fail;
}
return 0;
alloc_fail:
decode_close_mp3on4(avctx);
return AVERROR(ENOMEM);
}
static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
static void flush_mp3on4(AVCodecContext *avctx)
{
MP3On4DecodeContext *s = avctx->priv_data;
int i;
MP3On4DecodeContext *s = avctx->priv_data;
for (i = 0; i < s->frames; i++)
av_free(s->mp3decctx[i]);
return 0;
for (i = 0; i < s->frames; i++) {
MPADecodeContext *m = s->mp3decctx[i];
memset(m->synth_buf, 0, sizeof(m->synth_buf));
m->last_buf_size = 0;
}
}
......@@ -1987,12 +2031,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
int fsize, len = buf_size, out_size = 0;
uint32_t header;
OUT_INT *out_samples = data;
OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS];
OUT_INT *outptr, *bp;
int fr, j, n;
int fr, j, n, ch;
if(*data_size < MPA_FRAME_SIZE * MPA_MAX_CHANNELS * s->frames * sizeof(OUT_INT))
return -1;
if (*data_size < MPA_FRAME_SIZE * avctx->channels * sizeof(OUT_INT)) {
av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
return AVERROR(EINVAL);
}
*data_size = 0;
// Discard too short frames
......@@ -2000,10 +2045,11 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
return -1;
// If only one decoder interleave is not needed
outptr = s->frames == 1 ? out_samples : decoded_buf;
outptr = s->frames == 1 ? out_samples : s->decoded_buf;
avctx->bit_rate = 0;
ch = 0;
for (fr = 0; fr < s->frames; fr++) {
fsize = AV_RB16(buf) >> 4;
fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE);
......@@ -2016,6 +2062,14 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
break;
avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
if (ch + m->nb_channels > avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec "
"channel count\n");
return AVERROR_INVALIDDATA;
}
ch += m->nb_channels;
out_size += mp_decode_frame(m, outptr, buf, fsize);
buf += fsize;
len -= fsize;
......@@ -2026,13 +2080,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
bp = out_samples + s->coff[fr];
if(m->nb_channels == 1) {
for(j = 0; j < n; j++) {
*bp = decoded_buf[j];
*bp = s->decoded_buf[j];
bp += avctx->channels;
}
} else {
for(j = 0; j < n; j++) {
bp[0] = decoded_buf[j++];
bp[1] = decoded_buf[j];
bp[0] = s->decoded_buf[j++];
bp[1] = s->decoded_buf[j];
bp += avctx->channels;
}
}
......@@ -2110,7 +2164,7 @@ AVCodec ff_mp3on4_decoder = {
.init = decode_init_mp3on4,
.close = decode_close_mp3on4,
.decode = decode_frame_mp3on4,
.flush = flush,
.flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
};
#endif
......
......@@ -83,7 +83,7 @@ AVCodec ff_mp3on4float_decoder = {
.init = decode_init_mp3on4,
.close = decode_close_mp3on4,
.decode = decode_frame_mp3on4,
.flush = flush,
.flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
};
#endif
......@@ -843,7 +843,8 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
}
static av_always_inline
void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
void weight_h264_W_altivec(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset, int w)
{
int y, aligned;
vec_u8 vblock;
......@@ -864,7 +865,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
voffset = vec_splat(vtemp, 5);
aligned = !((unsigned long)block & 0xf);
for (y=0; y<h; y++) {
for (y = 0; y < height; y++) {
vblock = vec_ld(0, block);
v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
......@@ -888,8 +889,8 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
}
static av_always_inline
void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
int weightd, int weights, int offset, int w, int h)
void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset, int w)
{
int y, dst_aligned, src_aligned;
vec_u8 vsrc, vdst;
......@@ -912,7 +913,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
dst_aligned = !((unsigned long)dst & 0xf);
src_aligned = !((unsigned long)src & 0xf);
for (y=0; y<h; y++) {
for (y = 0; y < height; y++) {
vdst = vec_ld(0, dst);
vsrc = vec_ld(0, src);
......@@ -952,19 +953,18 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
}
}
#define H264_WEIGHT(W,H) \
static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
#define H264_WEIGHT(W) \
static void ff_weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \
int log2_denom, int weight, int offset){ \
weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \
}\
static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
static void ff_biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \
}
H264_WEIGHT(16,16)
H264_WEIGHT(16, 8)
H264_WEIGHT( 8,16)
H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
H264_WEIGHT(16)
H264_WEIGHT( 8)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
......@@ -1015,16 +1015,10 @@ void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_altivec;
}
}
}
......@@ -158,6 +158,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int l
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV444P9LE:
......
......@@ -41,24 +41,57 @@ static void free_buffers(VP8Context *s)
av_freep(&s->top_nnz);
av_freep(&s->edge_emu_buffer);
av_freep(&s->top_border);
av_freep(&s->segmentation_map);
s->macroblocks = NULL;
}
static void vp8_decode_flush(AVCodecContext *avctx)
static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
{
int ret;
if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
return ret;
if (!s->maps_are_invalid && s->num_maps_to_be_freed) {
f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
} else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
ff_thread_release_buffer(s->avctx, f);
return AVERROR(ENOMEM);
}
return 0;
}
static void vp8_release_frame(VP8Context *s, AVFrame *f, int is_close)
{
if (!is_close) {
if (f->ref_index[0]) {
assert(s->num_maps_to_be_freed < FF_ARRAY_ELEMS(s->segmentation_maps));
s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
f->ref_index[0] = NULL;
}
} else {
av_freep(&f->ref_index[0]);
}
ff_thread_release_buffer(s->avctx, f);
}
static void vp8_decode_flush_impl(AVCodecContext *avctx, int force, int is_close)
{
VP8Context *s = avctx->priv_data;
int i;
if (!avctx->is_copy) {
if (!avctx->is_copy || force) {
for (i = 0; i < 5; i++)
if (s->frames[i].data[0])
ff_thread_release_buffer(avctx, &s->frames[i]);
vp8_release_frame(s, &s->frames[i], is_close);
}
memset(s->framep, 0, sizeof(s->framep));
free_buffers(s);
s->maps_are_invalid = 1;
}
static void vp8_decode_flush(AVCodecContext *avctx)
{
vp8_decode_flush_impl(avctx, 0, 0);
}
static int update_dimensions(VP8Context *s, int width, int height)
......@@ -68,7 +101,7 @@ static int update_dimensions(VP8Context *s, int width, int height)
if (av_image_check_size(width, height, 0, s->avctx))
return AVERROR_INVALIDDATA;
vp8_decode_flush(s->avctx);
vp8_decode_flush_impl(s->avctx, 1, 0);
avcodec_set_dimensions(s->avctx, width, height);
}
......@@ -81,10 +114,9 @@ static int update_dimensions(VP8Context *s, int width, int height)
s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
!s->top_nnz || !s->top_border || !s->segmentation_map)
!s->top_nnz || !s->top_border)
return AVERROR(ENOMEM);
s->macroblocks = s->macroblocks_base + 1;
......@@ -1508,6 +1540,14 @@ static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
}
}
static void release_queued_segmaps(VP8Context *s, int is_close)
{
int leave_behind = is_close ? 0 : !s->maps_are_invalid;
while (s->num_maps_to_be_freed > leave_behind)
av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
s->maps_are_invalid = 0;
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt)
{
......@@ -1516,6 +1556,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
enum AVDiscard skip_thresh;
AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
release_queued_segmaps(s, 0);
if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
return ret;
......@@ -1538,7 +1580,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
ff_thread_release_buffer(avctx, &s->frames[i]);
vp8_release_frame(s, &s->frames[i], 0);
// find a free buffer
for (i = 0; i < 5; i++)
......@@ -1559,8 +1601,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0;
curframe->ref_index[0] = s->segmentation_map;
if ((ret = ff_thread_get_buffer(avctx, curframe))) {
if ((ret = vp8_alloc_frame(s, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
return ret;
}
......@@ -1652,8 +1693,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
......@@ -1736,7 +1777,8 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
static av_cold int vp8_decode_free(AVCodecContext *avctx)
{
vp8_decode_flush(avctx);
vp8_decode_flush_impl(avctx, 0, 1);
release_queued_segmaps(avctx->priv_data, 1);
return 0;
}
......
......@@ -130,7 +130,6 @@ typedef struct {
uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4];
uint8_t *segmentation_map;
/**
* Macroblocks can have one of 4 different quants in a frame when
......@@ -237,6 +236,16 @@ typedef struct {
H264PredContext hpc;
vp8_mc_func put_pixels_tab[3][3][3];
AVFrame frames[5];
/**
* A list of segmentation_map buffers that are to be free()'ed in
* the next decoding iteration. We can't free() them right away
* because the map may still be used by subsequent decoding threads.
* Unused if frame threading is off.
*/
uint8_t *segmentation_maps[5];
int num_maps_to_be_freed;
int maps_are_invalid;
} VP8Context;
#endif /* AVCODEC_VP8_H */
......@@ -1055,14 +1055,6 @@ emu_edge mmx
; int32_t max, unsigned int len)
;-----------------------------------------------------------------------------
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro VECTOR_CLIP_INT32 4
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
%ifidn %1, sse2
......
......@@ -24,6 +24,146 @@
SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 2
%ifdef ARCH_X86_64
cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
%else
cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
movss m0, mulm
%endif
SPLATD m0
shl lenq, 2
add srcq, lenq
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtdq2ps m1, [srcq+lenq ]
cvtdq2ps m2, [srcq+lenq+16]
%else
cvtpi2ps m1, [srcq+lenq ]
cvtpi2ps m3, [srcq+lenq+ 8]
cvtpi2ps m2, [srcq+lenq+16]
cvtpi2ps m4, [srcq+lenq+24]
movlhps m1, m3
movlhps m2, m4
%endif
mulps m1, m0
mulps m2, m0
mova [dstq+lenq ], m1
mova [dstq+lenq+16], m2
add lenq, 32
jl .loop
REP_RET
%endmacro
INIT_XMM
%define SPLATD SPLATD_SSE
%define movdqa movaps
INT32_TO_FLOAT_FMUL_SCALAR sse, 5
%undef movdqa
%define SPLATD SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
%undef SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16 2
cglobal float_to_int16_%1, 3,3,%2, dst, src, len
add lenq, lenq
lea srcq, [srcq+2*lenq]
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [srcq+2*lenq ]
cvtps2pi m1, [srcq+2*lenq+ 8]
cvtps2pi m2, [srcq+2*lenq+16]
cvtps2pi m3, [srcq+2*lenq+24]
packssdw m0, m1
packssdw m2, m3
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m2
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16 sse2, 2
INIT_MMX
FLOAT_TO_INT16 sse, 0
%define cvtps2pi pf2id
FLOAT_TO_INT16 3dnow, 0
%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_INTERLEAVE2 1
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
lea lenq, [4*r2q]
mov src1q, [src0q+gprsize]
mov src0q, [src0q]
add dstq, lenq
add src0q, lenq
add src1q, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [src0q+lenq]
cvtps2dq m1, [src1q+lenq]
packssdw m0, m1
movhlps m1, m0
punpcklwd m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [src0q+lenq ]
cvtps2pi m1, [src0q+lenq+8]
cvtps2pi m2, [src1q+lenq ]
cvtps2pi m3, [src1q+lenq+8]
packssdw m0, m1
packssdw m2, m3
mova m1, m0
punpcklwd m0, m2
punpckhwd m1, m2
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m1
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define cvtps2pi pf2id
FLOAT_TO_INT16_INTERLEAVE2 3dnow
%undef cvtps2pi
%define movdqa movaps
FLOAT_TO_INT16_INTERLEAVE2 sse
%undef movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e
%endmacro
......
This diff is collapsed.
......@@ -28,21 +28,20 @@ SECTION .text
;-----------------------------------------------------------------------------
; biweight pred:
;
; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int log2_denom, int weightd, int weights,
; int offset);
; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int height, int log2_denom, int weightd,
; int weights, int offset);
; and
; void h264_weight_16x16_sse2(uint8_t *dst, int stride,
; int log2_denom, int weight,
; int offset);
; void h264_weight_16_sse2(uint8_t *dst, int stride, int height,
; int log2_denom, int weight, int offset);
;-----------------------------------------------------------------------------
%macro WEIGHT_SETUP 0
add r4, r4
inc r4
movd m3, r3d
movd m5, r4d
movd m6, r2d
add r5, r5
inc r5
movd m3, r4d
movd m5, r5d
movd m6, r3d
pslld m5, m6
psrld m5, 1
%if mmsize == 16
......@@ -71,60 +70,41 @@ SECTION .text
packuswb m0, m1
%endmacro
%macro WEIGHT_FUNC_DBL_MM 1
cglobal h264_weight_16x%1_mmx2, 5, 5, 0
INIT_MMX
cglobal h264_weight_16_mmx2, 6, 6, 0
WEIGHT_SETUP
mov r2, %1
%if %1 == 16
.nextrow
WEIGHT_OP 0, 4
mova [r0 ], m0
WEIGHT_OP 8, 12
mova [r0+8], m0
add r0, r1
dec r2
dec r2d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_weight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX
WEIGHT_FUNC_DBL_MM 16
WEIGHT_FUNC_DBL_MM 8
%macro WEIGHT_FUNC_MM 4
cglobal h264_weight_%1x%2_%4, 7, 7, %3
%macro WEIGHT_FUNC_MM 3
cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP
mov r2, %2
%if %2 == 16
.nextrow
WEIGHT_OP 0, mmsize/2
mova [r0], m0
add r0, r1
dec r2
dec r2d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_weight_%1x16_%4.nextrow)
%endif
%endmacro
INIT_MMX
WEIGHT_FUNC_MM 8, 16, 0, mmx2
WEIGHT_FUNC_MM 8, 8, 0, mmx2
WEIGHT_FUNC_MM 8, 4, 0, mmx2
WEIGHT_FUNC_MM 8, 0, mmx2
INIT_XMM
WEIGHT_FUNC_MM 16, 16, 8, sse2
WEIGHT_FUNC_MM 16, 8, 8, sse2
WEIGHT_FUNC_MM 16, 8, sse2
%macro WEIGHT_FUNC_HALF_MM 5
cglobal h264_weight_%1x%2_%5, 5, 5, %4
%macro WEIGHT_FUNC_HALF_MM 3
cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP
mov r2, %2/2
sar r2d, 1
lea r3, [r1*2]
%if %2 == mmsize
.nextrow
WEIGHT_OP 0, r1
movh [r0], m0
......@@ -135,31 +115,34 @@ cglobal h264_weight_%1x%2_%5, 5, 5, %4
movh [r0+r1], m0
%endif
add r0, r3
dec r2
dec r2d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_weight_%1x%3_%5.nextrow)
%endif
%endmacro
INIT_MMX
WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM
WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SETUP 0
add r6, 1
or r6, 1
add r3, 1
movd m3, r4d
movd m4, r5d
movd m5, r6d
movd m6, r3d
%ifdef ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
%endif
mov off_regd, r7m
add off_regd, 1
or off_regd, 1
add r4, 1
movd m3, r5d
movd m4, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6
psrld m5, 1
%if mmsize == 16
......@@ -195,11 +178,10 @@ WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m1
%endmacro
%macro BIWEIGHT_FUNC_DBL_MM 1
cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
INIT_MMX
cglobal h264_biweight_16_mmx2, 7, 7, 0
BIWEIGHT_SETUP
mov r3, %1
%if %1 == 16
movifnidn r3d, r3m
.nextrow
BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, 4
......@@ -211,23 +193,14 @@ cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
mova [r0+8], m0
add r0, r2
add r1, r2
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX
BIWEIGHT_FUNC_DBL_MM 16
BIWEIGHT_FUNC_DBL_MM 8
%macro BIWEIGHT_FUNC_MM 4
cglobal h264_biweight_%1x%2_%4, 7, 7, %3
%macro BIWEIGHT_FUNC_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP
mov r3, %2
%if %2 == 16
movifnidn r3d, r3m
.nextrow
BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, mmsize/2
......@@ -235,28 +208,22 @@ cglobal h264_biweight_%1x%2_%4, 7, 7, %3
mova [r0], m0
add r0, r2
add r1, r2
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_%1x16_%4.nextrow)
%endif
%endmacro
INIT_MMX
BIWEIGHT_FUNC_MM 8, 16, 0, mmx2
BIWEIGHT_FUNC_MM 8, 8, 0, mmx2
BIWEIGHT_FUNC_MM 8, 4, 0, mmx2
BIWEIGHT_FUNC_MM 8, 0, mmx2
INIT_XMM
BIWEIGHT_FUNC_MM 16, 16, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, sse2
%macro BIWEIGHT_FUNC_HALF_MM 5
cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%macro BIWEIGHT_FUNC_HALF_MM 3
cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP
mov r3, %2/2
movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2]
%if %2 == mmsize
.nextrow
BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, r2
......@@ -270,31 +237,30 @@ cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%endif
add r0, r4
add r1, r4
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow)
%endif
%endmacro
INIT_MMX
BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM
BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SSSE3_SETUP 0
add r6, 1
or r6, 1
add r3, 1
movd m4, r4d
movd m0, r5d
movd m5, r6d
movd m6, r3d
%ifdef ARCH_X86_64
%define off_regd r11d
%else
%define off_regd r3d
%endif
mov off_regd, r7m
add off_regd, 1
or off_regd, 1
add r4, 1
movd m4, r5d
movd m0, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6
psrld m5, 1
punpcklbw m4, m0
......@@ -314,12 +280,11 @@ BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m2
%endmacro
%macro BIWEIGHT_SSSE3_16 1
cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
INIT_XMM
cglobal h264_biweight_16_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP
mov r3, %1
movifnidn r3d, r3m
%if %1 == 16
.nextrow
movh m0, [r0]
movh m2, [r0+8]
......@@ -330,25 +295,17 @@ cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
mova [r0], m0
add r0, r2
add r1, r2
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM
BIWEIGHT_SSSE3_16 16
BIWEIGHT_SSSE3_16 8
%macro BIWEIGHT_SSSE3_8 1
cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
cglobal h264_biweight_8_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP
mov r3, %1/2
movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2]
%if %1 == 16
.nextrow
movh m0, [r0]
movh m1, [r1]
......@@ -361,15 +318,6 @@ cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
movhps [r0+r2], m0
add r0, r4
add r1, r4
dec r3
dec r3d
jnz .nextrow
REP_RET
%else
jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM
BIWEIGHT_SSSE3_8 16
BIWEIGHT_SSSE3_8 8
BIWEIGHT_SSSE3_8 4
......@@ -36,33 +36,26 @@ cextern pw_1
SECTION .text
;-----------------------------------------------------------------------------
; void h264_weight(uint8_t *dst, int stride, int log2_denom,
; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
; int weight, int offset);
;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32
DECLARE_REG_TMP 2
%else
DECLARE_REG_TMP 10
%endif
%macro WEIGHT_PROLOGUE 1
mov t0, %1
%macro WEIGHT_PROLOGUE 0
.prologue
PROLOGUE 0,5,8
PROLOGUE 0,6,8
movifnidn r0, r0mp
movifnidn r1d, r1m
movifnidn r3d, r3m
movifnidn r4d, r4m
movifnidn r5d, r5m
%endmacro
%macro WEIGHT_SETUP 1
mova m0, [pw_1]
movd m2, r2m
movd m2, r3m
pslld m0, m2 ; 1<<log2_denom
SPLATW m0, m0
shl r4, 19 ; *8, move to upper half of dword
lea r4, [r4+r3*2+0x10000]
movd m3, r4d ; weight<<1 | 1+(offset<<(3))
shl r5, 19 ; *8, move to upper half of dword
lea r5, [r5+r4*2+0x10000]
movd m3, r5d ; weight<<1 | 1+(offset<<(3))
pshufd m3, m3, 0
mova m4, [pw_pixel_max]
paddw m2, [sq_1] ; log2_denom+1
......@@ -96,8 +89,8 @@ DECLARE_REG_TMP 10
%endmacro
%macro WEIGHT_FUNC_DBL 1
cglobal h264_weight_16x16_10_%1
WEIGHT_PROLOGUE 16
cglobal h264_weight_16_10_%1
WEIGHT_PROLOGUE
WEIGHT_SETUP %1
.nextrow
WEIGHT_OP %1, 0
......@@ -105,13 +98,9 @@ cglobal h264_weight_16x16_10_%1
WEIGHT_OP %1, 16
mova [r0+16], m5
add r0, r1
dec t0
dec r2d
jnz .nextrow
REP_RET
cglobal h264_weight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_16x16_10_%1.prologue)
%endmacro
INIT_XMM
......@@ -120,24 +109,16 @@ WEIGHT_FUNC_DBL sse4
%macro WEIGHT_FUNC_MM 1
cglobal h264_weight_8x16_10_%1
WEIGHT_PROLOGUE 16
cglobal h264_weight_8_10_%1
WEIGHT_PROLOGUE
WEIGHT_SETUP %1
.nextrow
WEIGHT_OP %1, 0
mova [r0], m5
add r0, r1
dec t0
dec r2d
jnz .nextrow
REP_RET
cglobal h264_weight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
cglobal h264_weight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
%endmacro
INIT_XMM
......@@ -146,8 +127,9 @@ WEIGHT_FUNC_MM sse4
%macro WEIGHT_FUNC_HALF_MM 1
cglobal h264_weight_4x8_10_%1
WEIGHT_PROLOGUE 4
cglobal h264_weight_4_10_%1
WEIGHT_PROLOGUE
sar r2d, 1
WEIGHT_SETUP %1
lea r3, [r1*2]
.nextrow
......@@ -155,17 +137,9 @@ cglobal h264_weight_4x8_10_%1
movh [r0], m5
movhps [r0+r1], m5
add r0, r3
dec t0
dec r2d
jnz .nextrow
REP_RET
cglobal h264_weight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
cglobal h264_weight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
%endmacro
INIT_XMM
......@@ -174,40 +148,40 @@ WEIGHT_FUNC_HALF_MM sse4
;-----------------------------------------------------------------------------
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
; int weightd, int weights, int offset);
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
; int log2_denom, int weightd, int weights, int offset);
;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32
DECLARE_REG_TMP 2,3
DECLARE_REG_TMP 3
%else
DECLARE_REG_TMP 10,2
DECLARE_REG_TMP 10
%endif
%macro BIWEIGHT_PROLOGUE 1
mov t0, %1
%macro BIWEIGHT_PROLOGUE 0
.prologue
PROLOGUE 0,7,8
movifnidn r0, r0mp
movifnidn r1, r1mp
movifnidn t1d, r2m
movifnidn r4d, r4m
movifnidn r2d, r2m
movifnidn r5d, r5m
movifnidn r6d, r6m
movifnidn t0d, r7m
%endmacro
%macro BIWEIGHT_SETUP 1
lea r6, [r6*4+1] ; (offset<<2)+1
or r6, 1
shl r5, 16
or r4, r5
movd m4, r4d ; weightd | weights
movd m5, r6d ; (offset+1)|1
movd m6, r3m ; log2_denom
lea t0, [t0*4+1] ; (offset<<2)+1
or t0, 1
shl r6, 16
or r5, r6
movd m4, r5d ; weightd | weights
movd m5, t0d ; (offset+1)|1
movd m6, r4m ; log2_denom
pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
paddd m6, [sq_1]
pshufd m4, m4, 0
pshufd m5, m5, 0
mova m3, [pw_pixel_max]
movifnidn r3d, r3m
%ifnidn %1, sse4
pxor m7, m7
%endif
......@@ -243,23 +217,19 @@ DECLARE_REG_TMP 10,2
%endmacro
%macro BIWEIGHT_FUNC_DBL 1
cglobal h264_biweight_16x16_10_%1
BIWEIGHT_PROLOGUE 16
cglobal h264_biweight_16_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1
.nextrow
BIWEIGHT %1, 0
mova [r0 ], m0
BIWEIGHT %1, 16
mova [r0+16], m0
add r0, t1
add r1, t1
dec t0
add r0, r2
add r1, r2
dec r3d
jnz .nextrow
REP_RET
cglobal h264_biweight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_16x16_10_%1.prologue)
%endmacro
INIT_XMM
......@@ -267,25 +237,17 @@ BIWEIGHT_FUNC_DBL sse2
BIWEIGHT_FUNC_DBL sse4
%macro BIWEIGHT_FUNC 1
cglobal h264_biweight_8x16_10_%1
BIWEIGHT_PROLOGUE 16
cglobal h264_biweight_8_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1
.nextrow
BIWEIGHT %1, 0
mova [r0], m0
add r0, t1
add r1, t1
dec t0
add r0, r2
add r1, r2
dec r3d
jnz .nextrow
REP_RET
cglobal h264_biweight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
cglobal h264_biweight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
%endmacro
INIT_XMM
......@@ -293,27 +255,20 @@ BIWEIGHT_FUNC sse2
BIWEIGHT_FUNC sse4
%macro BIWEIGHT_FUNC_HALF 1
cglobal h264_biweight_4x8_10_%1
BIWEIGHT_PROLOGUE 4
cglobal h264_biweight_4_10_%1
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1
lea r4, [t1*2]
sar r3d, 1
lea r4, [r2*2]
.nextrow
BIWEIGHT %1, 0, t1
BIWEIGHT %1, 0, r2
movh [r0 ], m0
movhps [r0+t1], m0
movhps [r0+r2], m0
add r0, r4
add r1, r4
dec t0
dec r3d
jnz .nextrow
REP_RET
cglobal h264_biweight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
cglobal h264_biweight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
%endmacro
INIT_XMM
......
This diff is collapsed.
......@@ -10,7 +10,7 @@ OBJS = alldevices.o avdevice.o
# input/output devices
OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \
alsa-audio-dec.o
alsa-audio-dec.o timefilter.o
OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \
alsa-audio-enc.o
OBJS-$(CONFIG_BKTR_INDEV) += bktr.o
......@@ -19,7 +19,7 @@ OBJS-$(CONFIG_DSHOW_INDEV) += dshow.o dshow_enummediatypes.o \
dshow_pin.o dshow_common.o
OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o
OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o
OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o
OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o timefilter.o
OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o
OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o
OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o
......@@ -39,4 +39,6 @@ OBJS-$(CONFIG_LIBDC1394_INDEV) += libdc1394.o
SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h
SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h
TESTPROGS = timefilter
include $(SRC_PATH)/subdir.mak
......@@ -33,7 +33,7 @@
#include <alsa/asoundlib.h>
#include "config.h"
#include "libavutil/log.h"
#include "libavformat/timefilter.h"
#include "timefilter.h"
#include "avdevice.h"
/* XXX: we make the assumption that the soundcard accepts this format */
......
......@@ -28,7 +28,8 @@
#include "libavutil/fifo.h"
#include "libavutil/opt.h"
#include "libavcodec/avcodec.h"
#include "libavformat/timefilter.h"
#include "libavformat/avformat.h"
#include "timefilter.h"
#include "avdevice.h"
/**
......
......@@ -24,8 +24,8 @@
#include "config.h"
#include "avformat.h"
#include "timefilter.h"
#include "libavutil/mem.h"
struct TimeFilter {
/// Delay Locked Loop data. These variables refer to mathematical
......
......@@ -22,8 +22,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFORMAT_TIMEFILTER_H
#define AVFORMAT_TIMEFILTER_H
#ifndef AVDEVICE_TIMEFILTER_H
#define AVDEVICE_TIMEFILTER_H
/**
* Opaque type representing a time filter state
......@@ -94,4 +94,4 @@ void ff_timefilter_reset(TimeFilter *);
*/
void ff_timefilter_destroy(TimeFilter *);
#endif /* AVFORMAT_TIMEFILTER_H */
#endif /* AVDEVICE_TIMEFILTER_H */
......@@ -354,11 +354,8 @@ OBJS-$(CONFIG_RTP_PROTOCOL) += rtpproto.o
OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o
OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o
# libavdevice dependencies
OBJS-$(CONFIG_ALSA_INDEV) += timefilter.o
OBJS-$(CONFIG_JACK_INDEV) += timefilter.o
TESTPROGS = seek timefilter
TESTPROGS = seek
TOOLS = pktdumper probetest
include $(SRC_PATH)/subdir.mak
......@@ -228,8 +228,9 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst
case AMF_DATA_TYPE_OBJECT: {
unsigned int keylen;
if (vstream && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1)
if (parse_keyframes_index(s, ioc, vstream, max_pos) < 0)
if ((vstream || astream) && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1)
if (parse_keyframes_index(s, ioc, vstream ? vstream : astream,
max_pos) < 0)
av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n");
while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) {
......
......@@ -60,10 +60,13 @@ typedef struct FLVContext {
int64_t duration_offset;
int64_t filesize_offset;
int64_t duration;
int delay; ///< first dts delay for AVC
int64_t last_ts;
} FLVContext;
typedef struct FLVStreamContext {
int delay; ///< first dts delay for each stream (needed for AVC & Speex)
int64_t last_ts; ///< last timestamp for each stream
} FLVStreamContext;
static int get_audio_flags(AVCodecContext *enc){
int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT;
......@@ -182,6 +185,7 @@ static int flv_write_header(AVFormatContext *s)
for(i=0; i<s->nb_streams; i++){
AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO) {
if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) {
framerate = av_q2d(s->streams[i]->r_frame_rate);
......@@ -199,6 +203,12 @@ static int flv_write_header(AVFormatContext *s)
return -1;
}
av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */
sc = av_mallocz(sizeof(FLVStreamContext));
if (!sc)
return AVERROR(ENOMEM);
s->streams[i]->priv_data = sc;
sc->last_ts = -1;
}
avio_write(pb, "FLV", 3);
avio_w8(pb,1);
......@@ -218,8 +228,6 @@ static int flv_write_header(AVFormatContext *s)
}
}
flv->last_ts = -1;
/* write meta_tag */
avio_w8(pb, 18); // tag type META
metadata_size_pos= avio_tell(pb);
......@@ -361,9 +369,10 @@ static int flv_write_trailer(AVFormatContext *s)
/* Add EOS tag */
for (i = 0; i < s->nb_streams; i++) {
AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc = s->streams[i]->priv_data;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO &&
(enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) {
put_avc_eos_tag(pb, flv->last_ts);
put_avc_eos_tag(pb, sc->last_ts);
}
}
......@@ -384,6 +393,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
AVIOContext *pb = s->pb;
AVCodecContext *enc = s->streams[pkt->stream_index]->codec;
FLVContext *flv = s->priv_data;
FLVStreamContext *sc = s->streams[pkt->stream_index]->priv_data;
unsigned ts;
int size= pkt->size;
uint8_t *data= NULL;
......@@ -434,20 +444,20 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n");
return -1;
}
if (!flv->delay && pkt->dts < 0)
flv->delay = -pkt->dts;
if (!sc->delay && pkt->dts < 0)
sc->delay = -pkt->dts;
ts = pkt->dts + flv->delay; // add delay to force positive dts
ts = pkt->dts + sc->delay; // add delay to force positive dts
/* check Speex packet duration */
if (enc->codec_id == CODEC_ID_SPEEX && ts - flv->last_ts > 160) {
if (enc->codec_id == CODEC_ID_SPEEX && ts - sc->last_ts > 160) {
av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than "
"8 frames per packet. Adobe Flash "
"Player cannot handle this!\n");
}
if (flv->last_ts < ts)
flv->last_ts = ts;
if (sc->last_ts < ts)
sc->last_ts = ts;
avio_wb24(pb,size + flags_size);
avio_wb24(pb,ts);
......@@ -471,7 +481,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
avio_write(pb, data ? data : pkt->data, size);
avio_wb32(pb,size+flags_size+11); // previous tag size
flv->duration = FFMAX(flv->duration, pkt->pts + flv->delay + pkt->duration);
flv->duration = FFMAX(flv->duration, pkt->pts + sc->delay + pkt->duration);
avio_flush(pb);
......
......@@ -35,6 +35,7 @@
#include "riff.h"
#include "isom.h"
#include "libavcodec/get_bits.h"
#include "id3v1.h"
#if CONFIG_ZLIB
#include <zlib.h>
......@@ -99,7 +100,7 @@ static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
return 0;
}
static int mov_metadata_int8(MOVContext *c, AVIOContext *pb,
static int mov_metadata_int8_bypass_padding(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key)
{
char buf[16];
......@@ -115,7 +116,7 @@ static int mov_metadata_int8(MOVContext *c, AVIOContext *pb,
return 0;
}
static int mov_metadata_stik(MOVContext *c, AVIOContext *pb,
static int mov_metadata_int8_no_padding(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key)
{
char buf[16];
......@@ -126,6 +127,23 @@ static int mov_metadata_stik(MOVContext *c, AVIOContext *pb,
return 0;
}
static int mov_metadata_gnre(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key)
{
short genre;
char buf[20];
avio_r8(pb); // unknown
genre = avio_r8(pb);
if (genre < 1 || genre > ID3v1_GENRE_MAX)
return 0;
snprintf(buf, sizeof(buf), "%s", ff_id3v1_genre_str[genre-1]);
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0;
}
static const uint32_t mac_to_unicode[128] = {
0x00C4,0x00C5,0x00C7,0x00C9,0x00D1,0x00D6,0x00DC,0x00E1,
0x00E0,0x00E2,0x00E4,0x00E3,0x00E5,0x00E7,0x00E9,0x00E8,
......@@ -189,6 +207,8 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG(0xa9,'a','l','b'): key = "album"; break;
case MKTAG(0xa9,'d','a','y'): key = "date"; break;
case MKTAG(0xa9,'g','e','n'): key = "genre"; break;
case MKTAG( 'g','n','r','e'): key = "genre";
parse = mov_metadata_gnre; break;
case MKTAG(0xa9,'t','o','o'):
case MKTAG(0xa9,'s','w','r'): key = "encoder"; break;
case MKTAG(0xa9,'e','n','c'): key = "encoder"; break;
......@@ -202,11 +222,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG( 'd','i','s','k'): key = "disc";
parse = mov_metadata_track_or_disc_number; break;
case MKTAG( 't','v','e','s'): key = "episode_sort";
parse = mov_metadata_int8; break;
parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 't','v','s','n'): key = "season_number";
parse = mov_metadata_int8; break;
parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 's','t','i','k'): key = "media_type";
parse = mov_metadata_stik; break;
parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'h','d','v','d'): key = "hd_video";
parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'p','g','a','p'): key = "gapless_playback";
parse = mov_metadata_int8_no_padding; break;
}
if (c->itunes_metadata && atom.size > 8) {
......
......@@ -859,6 +859,29 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
},
.flags = PIX_FMT_BE,
},
[PIX_FMT_YUV422P9LE] = {
.name = "yuv422p9le",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
},
[PIX_FMT_YUV422P9BE] = {
.name = "yuv422p9be",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
.flags = PIX_FMT_BE,
},
[PIX_FMT_YUV422P10LE] = {
.name = "yuv422p10le",
.nb_components= 3,
......
......@@ -149,12 +149,15 @@ enum PixelFormat {
PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R.
PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};
......@@ -182,6 +185,7 @@ enum PixelFormat {
#define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
#define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE)
#define PIX_FMT_YUV422P9 PIX_FMT_NE(YUV422P9BE , YUV422P9LE)
#define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE)
#define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
#define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE)
......
......@@ -536,6 +536,18 @@
%endif
%endmacro
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE 1
shufps %1, %1, 0
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2
pminsw %1, %3
......
......@@ -2843,6 +2843,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
#if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
......@@ -2852,6 +2853,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
#else
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
......@@ -2912,6 +2914,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
switch (srcFormat) {
#if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
......@@ -2922,6 +2925,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
#else
case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
......
......@@ -547,6 +547,8 @@ const char *sws_format_name(enum PixelFormat format);
#define isNBPS(x) ( \
(x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV422P10BE \
......@@ -574,6 +576,7 @@ const char *sws_format_name(enum PixelFormat format);
#define isPlanarYUV(x) ( \
isPlanar8YUV(x) \
|| (x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV420P10LE \
|| (x)==PIX_FMT_YUV422P10LE \
......@@ -583,6 +586,7 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \
|| (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV420P10BE \
|| (x)==PIX_FMT_YUV422P10BE \
......
......@@ -136,6 +136,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_YUV420P9LE] = { 1 , 1 },
[PIX_FMT_YUV420P10BE] = { 1 , 1 },
[PIX_FMT_YUV420P10LE] = { 1 , 1 },
[PIX_FMT_YUV422P9BE] = { 1 , 1 },
[PIX_FMT_YUV422P9LE] = { 1 , 1 },
[PIX_FMT_YUV422P10BE] = { 1 , 1 },
[PIX_FMT_YUV422P10LE] = { 1 , 1 },
[PIX_FMT_YUV444P9BE] = { 1 , 1 },
......@@ -280,15 +282,18 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
if (flags & SWS_BICUBIC) {
int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24);
int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
int64_t dd = ( d*d)>>30;
int64_t ddd= (dd*d)>>30;
if (d >= 1LL<<31) {
coeff = 0.0;
} else {
int64_t dd = (d * d) >> 30;
int64_t ddd = (dd * d) >> 30;
if (d < 1LL<<30)
coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
else if (d < 1LL<<31)
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
else
coeff=0.0;
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
}
coeff *= fone>>(30+24);
}
/* else if (flags & SWS_X) {
......
......@@ -790,8 +790,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
y_table32 = c->yuvTable;
yb = -(384<<16) - oy;
for (i = 0; i < 1024; i++) {
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase));
unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255u << abase));
y_table32[i+1024] = yval << gbase;
y_table32[i+2048] = yval << bbase;
yb += cy;
......
......@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
......
......@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
......
......@@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6
......
......@@ -42,6 +42,8 @@ yuv422p10be cea7ca6b0e66d6f29539885896c88603
yuv422p10le a10c4a5837547716f13cd61918b145f9
yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c
yuv422p16le 61bfcee8e54465f760164f5a75d40b5e
yuv422p9be 82494823944912f73cebc58ad2979bbd
yuv422p9le fc69c8a21f473916a4b4225636b97e06
yuv440p 461503fdb9b90451020aa3b25ddf041c
yuv444p 81b2eba962d12e8d64f003ac56f6faf2
yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9
......
......@@ -42,6 +42,8 @@ yuv422p10be 588fe319b96513c32e21d3e32b45447f
yuv422p10le 11b57f2bd9661024153f3973b9090cdb
yuv422p16be c092d083548c2a144c372a98c46875c7
yuv422p16le c071b9397a416d51cbe339345cbcba84
yuv422p9be 7c6f1e140b3999ee7d923854e507752a
yuv422p9le 51f10d79c07989060dd06e767e6d7d60
yuv440p 876385e96165acf51271b20e5d85a416
yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7
yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment