Commit 76741b0e authored by Baptiste Coudurier's avatar Baptiste Coudurier Committed by Ronald S. Bultje

h264: 4:2:2 intra decoding support

Signed-off-by: 's avatarDiego Biurrun <diego@biurrun.de>
Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent dc49bf12
...@@ -54,6 +54,7 @@ easier to use. The changes are: ...@@ -54,6 +54,7 @@ easier to use. The changes are:
- boxblur filter - boxblur filter
- Ut Video decoder - Ut Video decoder
- Speex encoding via libspeex - Speex encoding via libspeex
- 4:2:2 H.264 decoding support
version 0.7: version 0.7:
......
...@@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, ...@@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, DCTELEM *block, int stride,
const uint8_t nnzc[6*8]); const uint8_t nnzc[6*8]);
static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
if (bit_depth == 8) { if (bit_depth == 8) {
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
...@@ -122,6 +122,7 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) ...@@ -122,6 +122,7 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
c->h264_idct_add16 = ff_h264_idct_add16_neon; c->h264_idct_add16 = ff_h264_idct_add16_neon;
c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_neon; c->h264_idct_add8 = ff_h264_idct_add8_neon;
c->h264_idct8_add = ff_h264_idct8_add_neon; c->h264_idct8_add = ff_h264_idct8_add_neon;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon;
...@@ -129,7 +130,7 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) ...@@ -129,7 +130,7 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
} }
} }
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth) void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth); if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
} }
...@@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); ...@@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth) static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
{ {
const int high_depth = bit_depth > 8; const int high_depth = bit_depth > 8;
...@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b ...@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon;
} }
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth) void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc)
{ {
if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth); if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
} }
...@@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int strid ...@@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int strid
void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\ void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\
void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul); void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
H264_IDCT( 8) H264_IDCT( 8)
......
This diff is collapsed.
...@@ -39,13 +39,6 @@ ...@@ -39,13 +39,6 @@
#define interlaced_dct interlaced_dct_is_a_bad_name #define interlaced_dct interlaced_dct_is_a_bad_name
#define mb_intra mb_intra_is_not_initialized_see_mb_type #define mb_intra mb_intra_is_not_initialized_see_mb_type
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
#define COEFF_TOKEN_VLC_BITS 8
#define TOTAL_ZEROS_VLC_BITS 9
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
#define RUN_VLC_BITS 3
#define RUN7_VLC_BITS 6
#define MAX_SPS_COUNT 32 #define MAX_SPS_COUNT 32
#define MAX_PPS_COUNT 256 #define MAX_PPS_COUNT 256
...@@ -92,6 +85,7 @@ ...@@ -92,6 +85,7 @@
#define CABAC h->pps.cabac #define CABAC h->pps.cabac
#endif #endif
#define CHROMA422 (h->sps.chroma_format_idc == 2)
#define CHROMA444 (h->sps.chroma_format_idc == 3) #define CHROMA444 (h->sps.chroma_format_idc == 3)
#define EXTENDED_SAR 255 #define EXTENDED_SAR 255
...@@ -582,6 +576,8 @@ typedef struct H264Context{ ...@@ -582,6 +576,8 @@ typedef struct H264Context{
// Timestamp stuff // Timestamp stuff
int sei_buffering_period_present; ///< Buffering period SEI flag int sei_buffering_period_present; ///< Buffering period SEI flag
int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
int cur_chroma_format_idc;
}H264Context; }H264Context;
...@@ -809,7 +805,7 @@ static av_always_inline void write_back_non_zero_count(H264Context *h){ ...@@ -809,7 +805,7 @@ static av_always_inline void write_back_non_zero_count(H264Context *h){
AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
if(CHROMA444){ if(!h->s.chroma_y_shift){
AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
......
...@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { ...@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
}; };
static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) { static av_always_inline void
decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
const uint32_t *qmul, int max_coeff,
int is_dc, int chroma422)
{
static const int significant_coeff_flag_offset[2][14] = { static const int significant_coeff_flag_offset[2][14] = {
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
{ 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
...@@ -1587,12 +1592,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1587,12 +1592,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
}; };
static const uint8_t sig_coeff_offset_dc[7] = { 0, 0, 1, 1, 2, 2, 2 };
/* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
* 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
* map node ctx => cabac ctx for level=1 */ * map node ctx => cabac ctx for level=1 */
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
/* map node ctx => cabac ctx for level>1 */ /* map node ctx => cabac ctx for level>1 */
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 }; static const uint8_t coeff_abs_levelgt1_ctx[2][8] = {
{ 5, 5, 5, 5, 6, 7, 8, 9 },
{ 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case
};
static const uint8_t coeff_abs_level_transition[2][8] = { static const uint8_t coeff_abs_level_transition[2][8] = {
/* update node ctx after decoding a level=1 */ /* update node ctx after decoding a level=1 */
{ 1, 2, 3, 3, 4, 5, 6, 7 }, { 1, 2, 3, 3, 4, 5, 6, 7 },
...@@ -1650,13 +1659,21 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1650,13 +1659,21 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
last_coeff_ctx_base, sig_off); last_coeff_ctx_base, sig_off);
} else {
if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else { } else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
last_coeff_ctx_base-significant_coeff_ctx_base); last_coeff_ctx_base-significant_coeff_ctx_base);
}
#else #else
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
} else { } else {
DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else {
DECODE_SIGNIFICANCE(max_coeff - 1, last, last);
}
#endif #endif
} }
assert(coeff_count > 0); assert(coeff_count > 0);
...@@ -1691,7 +1708,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1691,7 +1708,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} \ } \
} else { \ } else { \
int coeff_abs = 2; \ int coeff_abs = 2; \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \ ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \ node_ctx = coeff_abs_level_transition[1][node_ctx]; \
\ \
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \ while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
...@@ -1733,11 +1750,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT ...@@ -1733,11 +1750,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} }
static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1); decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0);
}
static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1);
} }
static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0); decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0);
} }
/* cat: 0-> DC 16x16 n = 0 /* cat: 0-> DC 16x16 n = 0
...@@ -1761,6 +1785,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM * ...@@ -1761,6 +1785,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
} }
static av_always_inline void
decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
/* read coded block flag */
if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) {
h->non_zero_count_cache[scan8[n]] = 0;
return;
}
decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff);
}
static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
/* read coded block flag */ /* read coded block flag */
if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
...@@ -2313,7 +2350,36 @@ decode_intra_mb: ...@@ -2313,7 +2350,36 @@ decode_intra_mb:
if(CHROMA444){ if(CHROMA444){
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
} else if (CHROMA422) {
if( cbp&0x30 ){
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
CHROMA_DC_BLOCK_INDEX + c,
chroma422_dc_scan, 8);
}
}
if( cbp&0x20 ) {
int c, i, i8x8;
for( c = 0; c < 2; c++ ) {
DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for (i8x8 = 0; i8x8 < 2; i8x8++) {
for (i = 0; i < 4; i++) {
const int index = 16 + 16 * c + 8*i8x8 + i;
//av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16);
decode_cabac_residual_nondc(h, mb, 4, index, scan + 1, qmul, 15);
mb += 16<<pixel_shift;
}
}
}
} else { } else {
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
} else /* yuv420 */ {
if( cbp&0x30 ){ if( cbp&0x30 ){
int c; int c;
for( c = 0; c < 2; c++ ) { for( c = 0; c < 2; c++ ) {
......
...@@ -62,6 +62,30 @@ static const uint8_t chroma_dc_coeff_token_bits[4*5]={ ...@@ -62,6 +62,30 @@ static const uint8_t chroma_dc_coeff_token_bits[4*5]={
2, 3, 2, 0, 2, 3, 2, 0,
}; };
static const uint8_t chroma422_dc_coeff_token_len[4*9]={
1, 0, 0, 0,
7, 2, 0, 0,
7, 7, 3, 0,
9, 7, 7, 5,
9, 9, 7, 6,
10, 10, 9, 7,
11, 11, 10, 7,
12, 12, 11, 10,
13, 12, 12, 11,
};
static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
1, 0, 0, 0,
15, 1, 0, 0,
14, 13, 1, 0,
7, 12, 11, 1,
6, 5, 10, 1,
7, 6, 4, 9,
7, 6, 5, 8,
7, 6, 5, 4,
7, 5, 4, 4,
};
static const uint8_t coeff_token_len[4][4*17]={ static const uint8_t coeff_token_len[4][4*17]={
{ {
1, 0, 0, 0, 1, 0, 0, 0,
...@@ -172,6 +196,26 @@ static const uint8_t chroma_dc_total_zeros_bits[3][4]= { ...@@ -172,6 +196,26 @@ static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
{ 1, 0, 0, 0,}, { 1, 0, 0, 0,},
}; };
static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
{ 1, 3, 3, 4, 4, 4, 5, 5 },
{ 3, 2, 3, 3, 3, 3, 3 },
{ 3, 3, 2, 2, 3, 3 },
{ 3, 2, 2, 2, 3 },
{ 2, 2, 2, 2 },
{ 2, 2, 1 },
{ 1, 1 },
};
static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
{ 1, 2, 3, 2, 3, 1, 1, 0 },
{ 0, 1, 1, 4, 5, 6, 7 },
{ 0, 1, 1, 2, 6, 7 },
{ 6, 0, 1, 2, 7 },
{ 0, 1, 2, 3 },
{ 0, 1, 1 },
{ 0, 1 },
};
static const uint8_t run_len[7][16]={ static const uint8_t run_len[7][16]={
{1,1}, {1,1},
{1,2,2}, {1,2,2},
...@@ -200,6 +244,10 @@ static VLC chroma_dc_coeff_token_vlc; ...@@ -200,6 +244,10 @@ static VLC chroma_dc_coeff_token_vlc;
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
static const int chroma_dc_coeff_token_vlc_table_size = 256; static const int chroma_dc_coeff_token_vlc_table_size = 256;
static VLC chroma422_dc_coeff_token_vlc;
static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2];
static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
static VLC total_zeros_vlc[15]; static VLC total_zeros_vlc[15];
static VLC_TYPE total_zeros_vlc_tables[15][512][2]; static VLC_TYPE total_zeros_vlc_tables[15][512][2];
static const int total_zeros_vlc_tables_size = 512; static const int total_zeros_vlc_tables_size = 512;
...@@ -208,6 +256,10 @@ static VLC chroma_dc_total_zeros_vlc[3]; ...@@ -208,6 +256,10 @@ static VLC chroma_dc_total_zeros_vlc[3];
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
static const int chroma_dc_total_zeros_vlc_tables_size = 8; static const int chroma_dc_total_zeros_vlc_tables_size = 8;
static VLC chroma422_dc_total_zeros_vlc[7];
static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2];
static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
static VLC run_vlc[6]; static VLC run_vlc[6];
static VLC_TYPE run_vlc_tables[6][8][2]; static VLC_TYPE run_vlc_tables[6][8][2];
static const int run_vlc_tables_size = 8; static const int run_vlc_tables_size = 8;
...@@ -219,6 +271,14 @@ static const int run7_vlc_table_size = 96; ...@@ -219,6 +271,14 @@ static const int run7_vlc_table_size = 96;
#define LEVEL_TAB_BITS 8 #define LEVEL_TAB_BITS 8
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
#define COEFF_TOKEN_VLC_BITS 8
#define TOTAL_ZEROS_VLC_BITS 9
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
#define RUN_VLC_BITS 3
#define RUN7_VLC_BITS 6
/** /**
* gets the predicted number of non-zero coefficients. * gets the predicted number of non-zero coefficients.
...@@ -278,6 +338,13 @@ av_cold void ff_h264_decode_init_vlc(void){ ...@@ -278,6 +338,13 @@ av_cold void ff_h264_decode_init_vlc(void){
&chroma_dc_coeff_token_bits[0], 1, 1, &chroma_dc_coeff_token_bits[0], 1, 1,
INIT_VLC_USE_NEW_STATIC); INIT_VLC_USE_NEW_STATIC);
chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
&chroma422_dc_coeff_token_len [0], 1, 1,
&chroma422_dc_coeff_token_bits[0], 1, 1,
INIT_VLC_USE_NEW_STATIC);
offset = 0; offset = 0;
for(i=0; i<4; i++){ for(i=0; i<4; i++){
coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
...@@ -304,6 +371,17 @@ av_cold void ff_h264_decode_init_vlc(void){ ...@@ -304,6 +371,17 @@ av_cold void ff_h264_decode_init_vlc(void){
&chroma_dc_total_zeros_bits[i][0], 1, 1, &chroma_dc_total_zeros_bits[i][0], 1, 1,
INIT_VLC_USE_NEW_STATIC); INIT_VLC_USE_NEW_STATIC);
} }
for(i=0; i<7; i++){
chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
init_vlc(&chroma422_dc_total_zeros_vlc[i],
CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
&chroma422_dc_total_zeros_len [i][0], 1, 1,
&chroma422_dc_total_zeros_bits[i][0], 1, 1,
INIT_VLC_USE_NEW_STATIC);
}
for(i=0; i<15; i++){ for(i=0; i<15; i++){
total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
...@@ -373,7 +451,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in ...@@ -373,7 +451,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
//FIXME put trailing_onex into the context //FIXME put trailing_onex into the context
if(max_coeff <= 8){ if(max_coeff <= 8){
coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); if (max_coeff == 4)
coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
else
coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
total_coeff= coeff_token>>2; total_coeff= coeff_token>>2;
}else{ }else{
if(n >= LUMA_DC_BLOCK_INDEX){ if(n >= LUMA_DC_BLOCK_INDEX){
...@@ -483,12 +564,17 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in ...@@ -483,12 +564,17 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
if(total_coeff == max_coeff) if(total_coeff == max_coeff)
zeros_left=0; zeros_left=0;
else{ else{
/* FIXME: we don't actually support 4:2:2 yet. */ if (max_coeff <= 8) {
if(max_coeff <= 8) if (max_coeff == 4)
zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
else else
zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
} else {
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
} }
}
#define STORE_BLOCK(type) \ #define STORE_BLOCK(type) \
scantable += zeros_left + total_coeff - 1; \ scantable += zeros_left + total_coeff - 1; \
...@@ -994,7 +1080,7 @@ decode_intra_mb: ...@@ -994,7 +1080,7 @@ decode_intra_mb:
s->current_picture.f.mb_type[mb_xy] = mb_type; s->current_picture.f.mb_type[mb_xy] = mb_type;
if(cbp || IS_INTRA16x16(mb_type)){ if(cbp || IS_INTRA16x16(mb_type)){
int i4x4, chroma_idx; int i4x4, i8x8, chroma_idx;
int dquant; int dquant;
int ret; int ret;
GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
...@@ -1036,7 +1122,34 @@ decode_intra_mb: ...@@ -1036,7 +1122,34 @@ decode_intra_mb:
if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){ if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
return -1; return -1;
} }
} else { } else if (CHROMA422) {
if(cbp&0x30){
for(chroma_idx=0; chroma_idx<2; chroma_idx++)
if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
NULL, 8) < 0) {
return -1;
}
}
if(cbp&0x20){
for(chroma_idx=0; chroma_idx<2; chroma_idx++){
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
for (i8x8 = 0; i8x8 < 2; i8x8++) {
for (i4x4 = 0; i4x4 < 4; i4x4++) {
const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
return -1;
mb += 16 << pixel_shift;
}
}
}
}else{
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
} else /* yuv420 */ {
if(cbp&0x30){ if(cbp&0x30){
for(chroma_idx=0; chroma_idx<2; chroma_idx++) for(chroma_idx=0; chroma_idx<2; chroma_idx++)
if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
......
...@@ -212,6 +212,7 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m ...@@ -212,6 +212,7 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
int chroma444 = CHROMA444; int chroma444 = CHROMA444;
int chroma422 = CHROMA422;
int mb_xy = h->mb_xy; int mb_xy = h->mb_xy;
int left_type= h->left_type[LTOP]; int left_type= h->left_type[LTOP];
...@@ -289,6 +290,23 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m ...@@ -289,6 +290,23 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m
filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0);
filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0);
} }
}else if(chroma422){
if(left_type){
filter_mb_edgecv(&img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
filter_mb_edgecv(&img_cr[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
}
filter_mb_edgecv(&img_cb[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0);
filter_mb_edgecv(&img_cr[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0);
if(top_type){
filter_mb_edgech(&img_cb[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1);
filter_mb_edgech(&img_cr[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1);
}
filter_mb_edgech(&img_cb[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
filter_mb_edgech(&img_cr[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
filter_mb_edgech(&img_cb[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
filter_mb_edgech(&img_cr[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
filter_mb_edgech(&img_cb[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
filter_mb_edgech(&img_cr[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0);
}else{ }else{
if(left_type){ if(left_type){
filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1);
...@@ -411,10 +429,12 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){ ...@@ -411,10 +429,12 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
return v; return v;
} }
static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int chroma444, int dir) { static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int dir) {
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int edge; int edge;
int chroma_qp_avg[2]; int chroma_qp_avg[2];
int chroma444 = CHROMA444;
int chroma422 = CHROMA422;
const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type; const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
...@@ -564,8 +584,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u ...@@ -564,8 +584,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
for( edge = 1; edge < edges; edge++ ) { for( edge = 1; edge < edges; edge++ ) {
DECLARE_ALIGNED(8, int16_t, bS)[4]; DECLARE_ALIGNED(8, int16_t, bS)[4];
int qp; int qp;
const int deblock_edge = !IS_8x8DCT(mb_type & (edge<<24)); // (edge&1) && IS_8x8DCT(mb_type)
if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) if (!deblock_edge && (!chroma422 || dir == 0))
continue; continue;
if( IS_INTRA(mb_type)) { if( IS_INTRA(mb_type)) {
...@@ -627,14 +648,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u ...@@ -627,14 +648,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
} }
} }
} else { } else {
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0 ); if (chroma422) {
if (deblock_edge)
filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0);
if (chroma) {
filter_mb_edgech(&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
filter_mb_edgech(&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
}
} else {
filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0);
if (chroma) { if (chroma) {
if (chroma444) { if (chroma444) {
filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); filter_mb_edgeh (&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); filter_mb_edgeh (&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
} else if( (edge&1) == 0 ) { } else if ((edge&1) == 0) {
filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); filter_mb_edgech(&img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0);
filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); filter_mb_edgech(&img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0);
}
} }
} }
} }
...@@ -726,6 +756,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint ...@@ -726,6 +756,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 );
filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1 ); filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1 );
filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 ); filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 );
} else if (CHROMA422) {
filter_mb_mbaff_edgecv(h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1);
filter_mb_mbaff_edgecv(h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1);
filter_mb_mbaff_edgecv(h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1);
filter_mb_mbaff_edgecv(h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1);
}else{ }else{
filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1 ); filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1 );
filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 );
...@@ -754,9 +789,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint ...@@ -754,9 +789,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
#if CONFIG_SMALL #if CONFIG_SMALL
for( dir = 0; dir < 2; dir++ ) for( dir = 0; dir < 2; dir++ )
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, CHROMA444, dir); filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, dir);
#else #else
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, CHROMA444, 0); filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, 0);
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, CHROMA444, 1); filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, 1);
#endif #endif
} }
...@@ -510,7 +510,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){ ...@@ -510,7 +510,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
if(top_type){ if(top_type){
nnz = h->non_zero_count[top_xy]; nnz = h->non_zero_count[top_xy];
AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
if(CHROMA444){ if(!s->chroma_y_shift){
AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
}else{ }else{
...@@ -534,6 +534,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){ ...@@ -534,6 +534,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){
nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
}else if(CHROMA422) {
nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4];
nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4];
nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4];
nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4];
}else{ }else{
nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]];
nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]];
......
...@@ -396,7 +396,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ ...@@ -396,7 +396,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
#endif #endif
sps->crop= get_bits1(&s->gb); sps->crop= get_bits1(&s->gb);
if(sps->crop){ if(sps->crop){
int crop_limit = sps->chroma_format_idc == 3 ? 16 : 8; int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8;
int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8;
sps->crop_left = get_ue_golomb(&s->gb); sps->crop_left = get_ue_golomb(&s->gb);
sps->crop_right = get_ue_golomb(&s->gb); sps->crop_right = get_ue_golomb(&s->gb);
sps->crop_top = get_ue_golomb(&s->gb); sps->crop_top = get_ue_golomb(&s->gb);
...@@ -404,7 +405,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ ...@@ -404,7 +405,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
if(sps->crop_left || sps->crop_top){ if(sps->crop_left || sps->crop_top){
av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
} }
if(sps->crop_right >= crop_limit || sps->crop_bottom >= crop_limit){ if(sps->crop_right >= crop_horizontal_limit || sps->crop_bottom >= crop_vertical_limit){
av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n"); av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
} }
}else{ }else{
......
...@@ -80,7 +80,14 @@ static const uint8_t luma_dc_field_scan[16]={ ...@@ -80,7 +80,14 @@ static const uint8_t luma_dc_field_scan[16]={
static const uint8_t chroma_dc_scan[4]={ static const uint8_t chroma_dc_scan[4]={
(0+0*2)*16, (1+0*2)*16, (0+0*2)*16, (1+0*2)*16,
(0+1*2)*16, (1+1*2)*16, //FIXME (0+1*2)*16, (1+1*2)*16,
};
static const uint8_t chroma422_dc_scan[8]={
(0+0*2)*16, (0+1*2)*16,
(1+0*2)*16, (0+2*2)*16,
(0+3*2)*16, (1+1*2)*16,
(1+2*2)*16, (1+3*2)*16,
}; };
// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
#include "h264dsp_template.c" #include "h264dsp_template.c"
#undef BIT_DEPTH #undef BIT_DEPTH
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
#undef FUNC #undef FUNC
#define FUNC(a, depth) a ## _ ## depth ## _c #define FUNC(a, depth) a ## _ ## depth ## _c
...@@ -53,10 +53,16 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) ...@@ -53,10 +53,16 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\ c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\ c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\
c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\ c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\
if (chroma_format_idc == 1)\
c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\
else\
c->h264_idct_add8 = FUNC(ff_h264_idct_add8_422, depth);\
c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\ c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\ c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
if (chroma_format_idc == 1)\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
\ \
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
...@@ -86,11 +92,23 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) ...@@ -86,11 +92,23 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\ c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\ c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\ c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
if (chroma_format_idc == 1)\
c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
else\
c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, depth);\
if (chroma_format_idc == 1)\
c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
else\
c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\
c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\ c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
if (chroma_format_idc == 1)\
c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
else\
c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma422_intra, depth);\
if (chroma_format_idc == 1)\
c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
else\
c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\
c->h264_loop_filter_strength= NULL; c->h264_loop_filter_strength= NULL;
switch (bit_depth) { switch (bit_depth) {
...@@ -105,7 +123,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) ...@@ -105,7 +123,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
break; break;
} }
if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth); if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth); if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth); if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc);
} }
...@@ -74,9 +74,9 @@ typedef struct H264DSPContext{ ...@@ -74,9 +74,9 @@ typedef struct H264DSPContext{
void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
}H264DSPContext; }H264DSPContext;
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth); void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth); void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth); void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth); void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
#endif /* AVCODEC_H264DSP_H */ #endif /* AVCODEC_H264DSP_H */
...@@ -275,6 +275,14 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int ...@@ -275,6 +275,14 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int
{ {
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0);
} }
static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
}
static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
}
static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
{ {
...@@ -312,3 +320,11 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int strid ...@@ -312,3 +320,11 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int strid
{ {
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta);
} }
static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta)
{
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
}
static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
{
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
}
...@@ -224,6 +224,29 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM * ...@@ -224,6 +224,29 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
} }
} }
} }
void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
int i, j;
for(j=1; j<3; j++){
for(i=j*16; i<j*16+4; i++){
if(nnzc[ scan8[i] ])
FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
else if(((dctcoef*)block)[i*16])
FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
}
}
for(j=1; j<3; j++){
for(i=j*16+4; i<j*16+8; i++){
if(nnzc[ scan8[i+4] ])
FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
else if(((dctcoef*)block)[i*16])
FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}
}
}
/** /**
* IDCT transforms the 16 dc values and dequantizes them. * IDCT transforms the 16 dc values and dequantizes them.
* @param qmul quantization parameter * @param qmul quantization parameter
...@@ -263,6 +286,33 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int ...@@ -263,6 +286,33 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int
#undef stride #undef stride
} }
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2;
const int xStride= 16;
int i;
int temp[8];
static const uint8_t x_offset[2]={0, 16};
dctcoef *block = (dctcoef*)_block;
for(i=0; i<4; i++){
temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1];
}
for(i=0; i<2; i++){
const int offset= x_offset[i];
const int z0= temp[2*0+i] + temp[2*2+i];
const int z1= temp[2*0+i] - temp[2*2+i];
const int z2= temp[2*1+i] - temp[2*3+i];
const int z3= temp[2*1+i] + temp[2*3+i];
block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8;
block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8;
block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
}
}
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){ void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2; const int stride= 16*2;
const int xStride= 16; const int xStride= 16;
......
...@@ -361,7 +361,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ ...@@ -361,7 +361,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
/** /**
* Set the intra prediction function pointers. * Set the intra prediction function pointers.
*/ */
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){
// MpegEncContext * const s = &h->s; // MpegEncContext * const s = &h->s;
#undef FUNC #undef FUNC
...@@ -434,13 +434,23 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ ...@@ -434,13 +434,23 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\ h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\
h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\ h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\
\ \
if (chroma_format_idc == 1) {\
h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\
h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\
} else {\
h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x16_vertical , depth);\
h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x16_horizontal , depth);\
}\
if (codec_id != CODEC_ID_VP8) {\ if (codec_id != CODEC_ID_VP8) {\
if (chroma_format_idc == 1) {\
h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\
} else {\
h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane , depth);\
}\
} else\ } else\
h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\ if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
if (chroma_format_idc == 1) {\
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\
...@@ -448,6 +458,15 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ ...@@ -448,6 +458,15 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
} else {\
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
}\
}else{\ }else{\
h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
...@@ -457,7 +476,11 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ ...@@ -457,7 +476,11 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\
}\ }\
}\ }\
if (chroma_format_idc == 1) {\
h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\
} else {\
h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc , depth);\
}\
\ \
h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\ h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\
h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\ h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\
...@@ -504,6 +527,6 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ ...@@ -504,6 +527,6 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
break; break;
} }
if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth); if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth); if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
} }
...@@ -101,8 +101,8 @@ typedef struct H264PredContext{ ...@@ -101,8 +101,8 @@ typedef struct H264PredContext{
void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
}H264PredContext; }H264PredContext;
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth); void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth); void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth); void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
#endif /* AVCODEC_H264PRED_H */ #endif /* AVCODEC_H264PRED_H */
...@@ -454,6 +454,19 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ ...@@ -454,6 +454,19 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
} }
} }
static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
int i;
pixel *src = (pixel*)_src;
int stride = _stride>>(sizeof(pixel)-1);
const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
for(i=0; i<16; i++){
AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
}
}
static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
int i; int i;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
...@@ -466,6 +479,17 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ ...@@ -466,6 +479,17 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
int i;
pixel *src = (pixel*)_src;
stride >>= sizeof(pixel)-1;
for(i=0; i<16; i++){
const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
}
}
#define PRED8x8_X(n, v)\ #define PRED8x8_X(n, v)\
static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
int i;\ int i;\
...@@ -482,6 +506,11 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); ...@@ -482,6 +506,11 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1);
PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0);
PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1);
static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){
FUNCC(pred8x8_128_dc)(_src, stride);
FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
}
static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
int i; int i;
int dc0, dc2; int dc0, dc2;
...@@ -507,6 +536,11 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ ...@@ -507,6 +536,11 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){
FUNCC(pred8x8_left_dc)(_src, stride);
FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
}
static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
int i; int i;
int dc0, dc1; int dc0, dc1;
...@@ -532,6 +566,27 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ ...@@ -532,6 +566,27 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
int i;
int dc0, dc1;
pixel4 dc0splat, dc1splat;
pixel *src = (pixel*)_src;
stride >>= sizeof(pixel)-1;
dc0=dc1=0;
for(i=0;i<4; i++){
dc0+= src[i-stride];
dc1+= src[4+i-stride];
}
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
for(i=0; i<16; i++){
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
}
}
static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
int i; int i;
int dc0, dc1, dc2; int dc0, dc1, dc2;
...@@ -560,6 +615,48 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ ...@@ -560,6 +615,48 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
int i;
int dc0, dc1, dc2, dc3, dc4;
pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
pixel *src = (pixel*)_src;
stride >>= sizeof(pixel)-1;
dc0=dc1=dc2=dc3=dc4=0;
for(i=0;i<4; i++){
dc0+= src[-1+i*stride] + src[i-stride];
dc1+= src[4+i-stride];
dc2+= src[-1+(i+4)*stride];
dc3+= src[-1+(i+8)*stride];
dc4+= src[-1+(i+12)*stride];
}
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
for(i=0; i<4; i++){
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
}
for(i=4; i<8; i++){
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
}
for(i=8; i<12; i++){
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
}
for(i=12; i<16; i++){
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
}
}
//the following 4 function should not be optimized! //the following 4 function should not be optimized!
static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred8x8_top_dc)(src, stride); FUNCC(pred8x8_top_dc)(src, stride);
...@@ -618,6 +715,47 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ ...@@ -618,6 +715,47 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
} }
} }
static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
int j, k;
int a;
INIT_CLIP
pixel *src = (pixel*)_src;
int stride = _stride>>(sizeof(pixel)-1);
const pixel * const src0 = src +3-stride;
const pixel * src1 = src +8*stride-1;
const pixel * src2 = src1-2*stride; // == src+6*stride-1;
int H = src0[1] - src0[-1];
int V = src1[0] - src2[ 0];
for (k = 2; k <= 4; ++k) {
src1 += stride; src2 -= stride;
H += k*(src0[k] - src0[-k]);
V += k*(src1[0] - src2[ 0]);
}
for (; k <= 8; ++k) {
src1 += stride; src2 -= stride;
V += k*(src1[0] - src2[0]);
}
H = (17*H+16) >> 5;
V = (5*V+32) >> 6;
a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
for(j=16; j>0; --j) {
int b = a;
a += V;
src[0] = CLIP((b ) >> 5);
src[1] = CLIP((b+ H) >> 5);
src[2] = CLIP((b+2*H) >> 5);
src[3] = CLIP((b+3*H) >> 5);
src[4] = CLIP((b+4*H) >> 5);
src[5] = CLIP((b+5*H) >> 5);
src[6] = CLIP((b+6*H) >> 5);
src[7] = CLIP((b+7*H) >> 5);
src += stride;
}
}
#define SRC(x,y) src[(x)+(y)*stride] #define SRC(x,y) src[(x)+(y)*stride]
#define PL(y) \ #define PL(y) \
const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
......
...@@ -999,11 +999,12 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { ...@@ -999,11 +999,12 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
} }
} }
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth) void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
if (bit_depth == 8) { if (bit_depth == 8) {
c->h264_idct_add = ff_h264_idct_add_altivec; c->h264_idct_add = ff_h264_idct_add_altivec;
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_altivec; c->h264_idct_add8 = ff_h264_idct_add8_altivec;
c->h264_idct_add16 = ff_h264_idct_add16_altivec; c->h264_idct_add16 = ff_h264_idct_add16_altivec;
c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec;
......
...@@ -1343,7 +1343,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx) ...@@ -1343,7 +1343,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
if (MPV_common_init(s) < 0) if (MPV_common_init(s) < 0)
return -1; return -1;
ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8); ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8, 1);
#if CONFIG_RV30_DECODER #if CONFIG_RV30_DECODER
if (avctx->codec_id == CODEC_ID_RV30) if (avctx->codec_id == CODEC_ID_RV30)
......
...@@ -1769,7 +1769,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) ...@@ -1769,7 +1769,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
avctx->pix_fmt = PIX_FMT_YUV420P; avctx->pix_fmt = PIX_FMT_YUV420P;
dsputil_init(&s->dsp, avctx); dsputil_init(&s->dsp, avctx);
ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8); ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
ff_vp8dsp_init(&s->vp8dsp); ff_vp8dsp_init(&s->vp8dsp);
return 0; return 0;
......
...@@ -167,7 +167,7 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s ...@@ -167,7 +167,7 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s
void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride);
void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
{ {
#if HAVE_YASM #if HAVE_YASM
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
...@@ -176,13 +176,16 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -176,13 +176,16 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
if (mm_flags & AV_CPU_FLAG_MMX) { if (mm_flags & AV_CPU_FLAG_MMX) {
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
if (chroma_format_idc == 1) {
h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx;
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
}
if (codec_id == CODEC_ID_VP8) { if (codec_id == CODEC_ID_VP8) {
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx; h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx;
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx; h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx;
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx;
} else { } else {
if (chroma_format_idc == 1)
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
if (codec_id == CODEC_ID_SVQ3) { if (codec_id == CODEC_ID_SVQ3) {
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
...@@ -197,7 +200,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -197,7 +200,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
if (mm_flags & AV_CPU_FLAG_MMX2) { if (mm_flags & AV_CPU_FLAG_MMX2) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; if (chroma_format_idc == 1)
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext;
h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
...@@ -221,8 +225,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -221,8 +225,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext;
} }
if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
h->pred8x8 [TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; if (chroma_format_idc == 1) {
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_mmxext;
}
} }
if (codec_id == CODEC_ID_VP8) { if (codec_id == CODEC_ID_VP8) {
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext; h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext;
...@@ -231,6 +237,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -231,6 +237,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext;
h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext;
} else { } else {
if (chroma_format_idc == 1)
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
if (codec_id == CODEC_ID_SVQ3) { if (codec_id == CODEC_ID_SVQ3) {
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2; h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2;
...@@ -257,7 +264,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -257,7 +264,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2; h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2;
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2; h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2;
} else { } else {
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_plane_sse2; if (chroma_format_idc == 1)
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_sse2;
if (codec_id == CODEC_ID_SVQ3) { if (codec_id == CODEC_ID_SVQ3) {
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2;
} else if (codec_id == CODEC_ID_RV40) { } else if (codec_id == CODEC_ID_RV40) {
...@@ -271,6 +279,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -271,6 +279,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
if (mm_flags & AV_CPU_FLAG_SSSE3) { if (mm_flags & AV_CPU_FLAG_SSSE3) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
if (chroma_format_idc == 1)
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3;
...@@ -286,6 +295,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -286,6 +295,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3; h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3;
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3;
} else { } else {
if (chroma_format_idc == 1)
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
if (codec_id == CODEC_ID_SVQ3) { if (codec_id == CODEC_ID_SVQ3) {
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3;
...@@ -301,6 +311,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -301,6 +311,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
if (chroma_format_idc == 1)
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext;
h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext; h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext;
...@@ -319,11 +330,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth ...@@ -319,11 +330,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2;
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2;
if (chroma_format_idc == 1) {
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2;
h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2;
h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2;
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2;
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2;
}
h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2; h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2;
h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2; h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2;
......
...@@ -350,7 +350,7 @@ H264_BIWEIGHT_10_SSE( 4, 8, 10) ...@@ -350,7 +350,7 @@ H264_BIWEIGHT_10_SSE( 4, 8, 10)
H264_BIWEIGHT_10_SSE( 4, 4, 10) H264_BIWEIGHT_10_SSE( 4, 4, 10)
H264_BIWEIGHT_10_SSE( 4, 2, 10) H264_BIWEIGHT_10_SSE( 4, 2, 10)
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
...@@ -368,6 +368,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -368,6 +368,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
...@@ -377,13 +378,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -377,13 +378,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2;
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2;
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2;
c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
if (chroma_format_idc == 1) {
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
}
#if ARCH_X86_32 #if ARCH_X86_32
c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
...@@ -413,6 +417,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -413,6 +417,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
...@@ -472,6 +477,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -472,6 +477,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2; c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2;
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
...@@ -532,6 +538,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -532,6 +538,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
c->h264_idct_add16 = ff_h264_idct_add16_10_avx; c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_10_avx; c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx; c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx;
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment