Commit 1eeca886 authored by Jason Garrett-Glaser's avatar Jason Garrett-Glaser

VP8: optimize VP8Context struct ordering

Shaves at least 3KB off code size on x86, should improve cache utilization.
This would probably be useful to do for other decoders/encoders as well.
parent 3efbe137
...@@ -474,7 +474,7 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) ...@@ -474,7 +474,7 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
int idx = CNT_ZERO; int idx = CNT_ZERO;
int cur_sign_bias = s->sign_bias[mb->ref_frame]; int cur_sign_bias = s->sign_bias[mb->ref_frame];
int *sign_bias = s->sign_bias; int8_t *sign_bias = s->sign_bias;
VP56mv near_mv[4]; VP56mv near_mv[4];
uint8_t cnt[4] = { 0 }; uint8_t cnt[4] = { 0 };
VP56RangeCoder *c = &s->c; VP56RangeCoder *c = &s->c;
......
...@@ -85,83 +85,24 @@ typedef struct { ...@@ -85,83 +85,24 @@ typedef struct {
typedef struct { typedef struct {
AVCodecContext *avctx; AVCodecContext *avctx;
DSPContext dsp;
VP8DSPContext vp8dsp;
H264PredContext hpc;
vp8_mc_func put_pixels_tab[3][3][3];
AVFrame frames[4];
AVFrame *framep[4]; AVFrame *framep[4];
uint8_t *edge_emu_buffer; uint8_t *edge_emu_buffer;
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
int profile;
int mb_width; /* number of horizontal MB */ uint16_t mb_width; /* number of horizontal MB */
int mb_height; /* number of vertical MB */ uint16_t mb_height; /* number of vertical MB */
int linesize; int linesize;
int uvlinesize; int uvlinesize;
int keyframe; uint8_t keyframe;
int invisible; uint8_t deblock_filter;
int update_last; ///< update VP56_FRAME_PREVIOUS with the current one uint8_t mbskip_enabled;
int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so uint8_t segment; ///< segment of the current macroblock
int update_altref; uint8_t chroma_pred_mode; ///< 8x8c pred mode of the current macroblock
int deblock_filter; uint8_t profile;
/**
* If this flag is not set, all the probability updates
* are discarded after this frame is decoded.
*/
int update_probabilities;
/**
* All coefficients are contained in separate arith coding contexts.
* There can be 1, 2, 4, or 8 of these after the header context.
*/
int num_coeff_partitions;
VP56RangeCoder coeff_partition[8];
VP8Macroblock *macroblocks;
VP8Macroblock *macroblocks_base;
VP8FilterStrength *filter_strength;
uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4];
uint8_t *segmentation_map;
/**
* Cache of the top row needed for intra prediction
* 16 for luma, 8 for each chroma plane
*/
uint8_t (*top_border)[16+8+8];
/**
* For coeff decode, we need to know whether the above block had non-zero
* coefficients. This means for each macroblock, we need data for 4 luma
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
* per macroblock. We keep the last row in top_nnz.
*/
uint8_t (*top_nnz)[9];
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
/**
* This is the index plus one of the last non-zero coeff
* for each of the blocks in the current macroblock.
* So, 0 -> no coeffs
* 1 -> dc-only (special transform)
* 2+-> full transform
*/
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
uint8_t intra4x4_pred_mode_mb[16];
int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock
int segment; ///< segment of the current macroblock
VP56mv mv_min; VP56mv mv_min;
VP56mv mv_max; VP56mv mv_max;
int mbskip_enabled; int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
int sign_bias[4]; ///< one state [0, 1] per ref frame type
int ref_count[3]; int ref_count[3];
/** /**
...@@ -170,13 +111,26 @@ typedef struct { ...@@ -170,13 +111,26 @@ typedef struct {
* a frame, since the values persist between interframes. * a frame, since the values persist between interframes.
*/ */
struct { struct {
int enabled; uint8_t enabled;
int absolute_vals; uint8_t absolute_vals;
int update_map; uint8_t update_map;
int8_t base_quant[4]; int8_t base_quant[4];
int8_t filter_level[4]; ///< base loop filter level int8_t filter_level[4]; ///< base loop filter level
} segmentation; } segmentation;
struct {
uint8_t simple;
uint8_t level;
uint8_t sharpness;
} filter;
VP8Macroblock *macroblocks;
VP8FilterStrength *filter_strength;
uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4];
uint8_t *segmentation_map;
/** /**
* Macroblocks can have one of 4 different quants in a frame when * Macroblocks can have one of 4 different quants in a frame when
* segmentation is enabled. * segmentation is enabled.
...@@ -190,13 +144,7 @@ typedef struct { ...@@ -190,13 +144,7 @@ typedef struct {
} qmat[4]; } qmat[4];
struct { struct {
int simple; uint8_t enabled; ///< whether each mb can have a different strength based on mode/ref
int level;
int sharpness;
} filter;
struct {
int enabled; ///< whether each mb can have a different strength based on mode/ref
/** /**
* filter strength adjustment for the following macroblock modes: * filter strength adjustment for the following macroblock modes:
...@@ -219,6 +167,34 @@ typedef struct { ...@@ -219,6 +167,34 @@ typedef struct {
int8_t ref[4]; int8_t ref[4];
} lf_delta; } lf_delta;
/**
* Cache of the top row needed for intra prediction
* 16 for luma, 8 for each chroma plane
*/
uint8_t (*top_border)[16+8+8];
/**
* For coeff decode, we need to know whether the above block had non-zero
* coefficients. This means for each macroblock, we need data for 4 luma
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
* per macroblock. We keep the last row in top_nnz.
*/
uint8_t (*top_nnz)[9];
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
/**
* This is the index plus one of the last non-zero coeff
* for each of the blocks in the current macroblock.
* So, 0 -> no coeffs
* 1 -> dc-only (special transform)
* 2+-> full transform
*/
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
uint8_t intra4x4_pred_mode_mb[16];
/** /**
* These are all of the updatable probabilities for binary decisions. * These are all of the updatable probabilities for binary decisions.
* They are only implictly reset on keyframes, making it quite likely * They are only implictly reset on keyframes, making it quite likely
...@@ -236,6 +212,30 @@ typedef struct { ...@@ -236,6 +212,30 @@ typedef struct {
uint8_t token[4][16][3][NUM_DCT_TOKENS-1]; uint8_t token[4][16][3][NUM_DCT_TOKENS-1];
uint8_t mvc[2][19]; uint8_t mvc[2][19];
} prob[2]; } prob[2];
VP8Macroblock *macroblocks_base;
int invisible;
int update_last; ///< update VP56_FRAME_PREVIOUS with the current one
int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
int update_altref;
/**
* If this flag is not set, all the probability updates
* are discarded after this frame is decoded.
*/
int update_probabilities;
/**
* All coefficients are contained in separate arith coding contexts.
* There can be 1, 2, 4, or 8 of these after the header context.
*/
int num_coeff_partitions;
VP56RangeCoder coeff_partition[8];
DSPContext dsp;
VP8DSPContext vp8dsp;
H264PredContext hpc;
vp8_mc_func put_pixels_tab[3][3][3];
AVFrame frames[4];
} VP8Context; } VP8Context;
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment