Commit 7d2e03af authored by Ronald S. Bultje's avatar Ronald S. Bultje

vc1: make overlap filter for I-frames bit-exact.

parent 5c9f147e
......@@ -317,6 +317,8 @@ typedef struct VC1Context{
int bi_type;
int x8_type;
DCTELEM (*block)[6][64];
int n_allocated_blks, cur_blk_idx, left_blk_idx, topleft_blk_idx, top_blk_idx;
uint32_t *cbp_base, *cbp;
uint8_t *is_intra_base, *is_intra;
int16_t (*luma_mv_base)[2], (*luma_mv)[2];
......
......@@ -160,6 +160,72 @@ enum Imode {
/** @} */ //Bitplane group
static void vc1_put_signed_blocks_clamped(VC1Context *v)
{
MpegEncContext *s = &v->s;
/* The put pixels loop is always one MB row behind the decoding loop,
* because we can only put pixels when overlap filtering is done, and
* for filtering of the bottom edge of a MB, we need the next MB row
* present as well.
* Within the row, the put pixels loop is also one MB col behind the
* decoding loop. The reason for this is again, because for filtering
* of the right MB edge, we need the next MB present. */
if (!s->first_slice_line) {
if (s->mb_x) {
s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
s->dest[0] - 16 * s->linesize - 16,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1],
s->dest[0] - 16 * s->linesize - 8,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2],
s->dest[0] - 8 * s->linesize - 16,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
s->dest[0] - 8 * s->linesize - 8,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
s->dest[1] - 8 * s->uvlinesize - 8,
s->uvlinesize);
s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5],
s->dest[2] - 8 * s->uvlinesize - 8,
s->uvlinesize);
}
if (s->mb_x == s->mb_width - 1) {
s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
s->dest[0] - 16 * s->linesize,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1],
s->dest[0] - 16 * s->linesize + 8,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2],
s->dest[0] - 8 * s->linesize,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
s->dest[0] - 8 * s->linesize + 8,
s->linesize);
s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
s->dest[1] - 8 * s->uvlinesize,
s->uvlinesize);
s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5],
s->dest[2] - 8 * s->uvlinesize,
s->uvlinesize);
}
}
#define inc_blk_idx(idx) do { \
idx++; \
if (idx >= v->n_allocated_blks) \
idx = 0; \
} while (0)
inc_blk_idx(v->topleft_blk_idx);
inc_blk_idx(v->top_blk_idx);
inc_blk_idx(v->left_blk_idx);
inc_blk_idx(v->cur_blk_idx);
}
static void vc1_loop_filter_iblk(VC1Context *v, int pq)
{
MpegEncContext *s = &v->s;
......@@ -187,6 +253,151 @@ static void vc1_loop_filter_iblk(VC1Context *v, int pq)
}
}
static void vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
{
MpegEncContext *s = &v->s;
int j;
/* The loopfilter runs 1 row and 1 column behind the overlap filter, which
* means it runs two rows/cols behind the decoding loop. */
if (!s->first_slice_line) {
if (s->mb_x) {
if (s->mb_y >= s->start_mb_y + 2) {
v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
if (s->mb_x >= 2)
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
for(j = 0; j < 2; j++) {
v->vc1dsp.vc1_v_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
if (s->mb_x >= 2) {
v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
}
}
}
v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
}
if (s->mb_x == s->mb_width - 1) {
if (s->mb_y >= s->start_mb_y + 2) {
v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
if (s->mb_x)
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
for(j = 0; j < 2; j++) {
v->vc1dsp.vc1_v_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize, s->uvlinesize, pq);
if (s->mb_x >= 2) {
v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 16 * s->uvlinesize, s->uvlinesize, pq);
}
}
}
v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
}
if (s->mb_y == s->mb_height) {
if (s->mb_x) {
if (s->mb_x >= 2)
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
if (s->mb_x >= 2) {
for(j = 0; j < 2; j++) {
v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
}
}
}
if (s->mb_x == s->mb_width - 1) {
if (s->mb_x)
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
if (s->mb_x) {
for(j = 0; j < 2; j++) {
v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize, s->uvlinesize, pq);
}
}
}
}
}
}
static void vc1_smooth_overlap_filter_iblk(VC1Context *v)
{
MpegEncContext *s = &v->s;
int mb_pos;
if (v->condover == CONDOVER_NONE)
return;
mb_pos = s->mb_x + s->mb_y * s->mb_stride;
/* Within a MB, the horizontal overlap always runs before the vertical.
* To accomplish that, we run the H on left and internal borders of the
* currently decoded MB. Then, we wait for the next overlap iteration
* to do H overlap on the right edge of this MB, before moving over and
* running the V overlap. Therefore, the V overlap makes us trail by one
* MB col and the H overlap filter makes us trail by one MB row. This
* is reflected in the time at which we run the put_pixels loop. */
if(v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
if(s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
v->over_flags_plane[mb_pos - 1])) {
v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
v->block[v->cur_blk_idx][0]);
v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
v->block[v->cur_blk_idx][2]);
if(!(s->flags & CODEC_FLAG_GRAY)) {
v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
v->block[v->cur_blk_idx][4]);
v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
v->block[v->cur_blk_idx][5]);
}
}
v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
v->block[v->cur_blk_idx][1]);
v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
v->block[v->cur_blk_idx][3]);
if (s->mb_x == s->mb_width - 1) {
if(!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
v->over_flags_plane[mb_pos - s->mb_stride])) {
v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
v->block[v->cur_blk_idx][0]);
v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
v->block[v->cur_blk_idx][1]);
if(!(s->flags & CODEC_FLAG_GRAY)) {
v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
v->block[v->cur_blk_idx][4]);
v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
v->block[v->cur_blk_idx][5]);
}
}
v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
v->block[v->cur_blk_idx][2]);
v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
v->block[v->cur_blk_idx][3]);
}
}
if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
if(!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
v->block[v->left_blk_idx][0]);
v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
v->block[v->left_blk_idx][1]);
if(!(s->flags & CODEC_FLAG_GRAY)) {
v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
v->block[v->left_blk_idx][4]);
v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
v->block[v->left_blk_idx][5]);
}
}
v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
v->block[v->left_blk_idx][2]);
v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
v->block[v->left_blk_idx][3]);
}
}
/** Do motion compensation over 1 macroblock
* Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
*/
......@@ -2773,7 +2984,6 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
int mb_pos;
int mquant = v->pq;
int mqdiff;
int overlap;
GetBitContext *gb = &s->gb;
/* select codingmode used for VLC tables selection */
......@@ -2816,15 +3026,9 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
s->mb_x = 0;
ff_init_block_index(s);
for(;s->mb_x < s->mb_width; s->mb_x++) {
uint8_t *dst[6];
DCTELEM (*block)[64] = v->block[v->cur_blk_idx];
ff_update_block_index(s);
dst[0] = s->dest[0];
dst[1] = dst[0] + 8;
dst[2] = s->dest[0] + s->linesize * 8;
dst[3] = dst[2] + 8;
dst[4] = s->dest[1];
dst[5] = s->dest[2];
s->dsp.clear_blocks(s->block[0]);
s->dsp.clear_blocks(block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_stride;
s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
s->current_picture.motion_val[1][s->block_index[0]][0] = 0;
......@@ -2837,13 +3041,8 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
else
v->s.ac_pred = v->acpred_plane[mb_pos];
if(v->condover == CONDOVER_SELECT) {
if(v->overflg_is_raw)
overlap = get_bits1(&v->s.gb);
else
overlap = v->over_flags_plane[mb_pos];
} else
overlap = (v->condover == CONDOVER_ALL);
if (v->condover == CONDOVER_SELECT && v->overflg_is_raw)
v->over_flags_plane[mb_pos] = get_bits1(&v->s.gb);
GET_MQUANT();
......@@ -2865,37 +3064,15 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
v->a_avail = !s->first_slice_line || (k==2 || k==3);
v->c_avail = !!s->mb_x || (k==1 || k==3);
vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
vc1_decode_i_block_adv(v, block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue;
v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
s->dsp.put_signed_pixels_clamped(s->block[k], dst[k],
k & 4 ? s->uvlinesize : s->linesize);
v->vc1dsp.vc1_inv_trans_8x8(block[k]);
}
if(overlap) {
if(s->mb_x) {
v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
if(!(s->flags & CODEC_FLAG_GRAY)) {
v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
}
}
v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
if(!s->first_slice_line) {
v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
if(!(s->flags & CODEC_FLAG_GRAY)) {
v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
}
}
v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
}
if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
vc1_smooth_overlap_filter_iblk(v);
vc1_put_signed_blocks_clamped(v);
if(v->s.loop_filter) vc1_loop_filter_iblk_delayed(v, v->pq);
if(get_bits_count(&s->gb) > v->bits) {
ff_er_add_slice(s, 0, s->start_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
......@@ -2909,6 +3086,15 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
ff_draw_horiz_band(s, (s->mb_y-1) * 16, 16);
s->first_slice_line = 0;
}
/* raw bottom MB row */
s->mb_x = 0;
ff_init_block_index(s);
for(;s->mb_x < s->mb_width; s->mb_x++) {
ff_update_block_index(s);
vc1_put_signed_blocks_clamped(v);
if(v->s.loop_filter) vc1_loop_filter_iblk_delayed(v, v->pq);
}
if (v->s.loop_filter)
ff_draw_horiz_band(s, (s->mb_height-1)*16, 16);
ff_er_add_slice(s, 0, s->start_mb_y, s->mb_width - 1, s->end_mb_y - 1, (AC_END|DC_END|MV_END));
......@@ -3063,6 +3249,10 @@ static void vc1_decode_blocks(VC1Context *v)
if(v->x8_type){
ff_intrax8_decode_picture(&v->x8, 2*v->pq+v->halfpq, v->pq*(!v->pquantizer) );
}else{
v->cur_blk_idx = 0;
v->left_blk_idx = -1;
v->topleft_blk_idx = 1;
v->top_blk_idx = 2;
switch(v->s.pict_type) {
case AV_PICTURE_TYPE_I:
if(v->profile == PROFILE_ADVANCED)
......@@ -3333,6 +3523,8 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
v->acpred_plane = av_malloc(s->mb_stride * s->mb_height);
v->over_flags_plane = av_malloc(s->mb_stride * s->mb_height);
v->n_allocated_blks = s->mb_width + 2;
v->block = av_malloc(sizeof(*v->block) * v->n_allocated_blks);
v->cbp_base = av_malloc(sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
v->cbp = v->cbp_base + s->mb_stride;
v->ttblk_base = av_malloc(sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
......@@ -3598,6 +3790,7 @@ static av_cold int vc1_decode_end(AVCodecContext *avctx)
av_freep(&v->acpred_plane);
av_freep(&v->over_flags_plane);
av_freep(&v->mb_type_base);
av_freep(&v->block);
av_freep(&v->cbp_base);
av_freep(&v->ttblk_base);
av_freep(&v->is_intra_base); // FIXME use v->mb_type[]
......
......@@ -78,6 +78,58 @@ static void vc1_h_overlap_c(uint8_t* src, int stride)
}
}
static void vc1_v_s_overlap_c(DCTELEM *top, DCTELEM *bottom)
{
int i;
int a, b, c, d;
int d1, d2;
int rnd1 = 4, rnd2 = 3;
for(i = 0; i < 8; i++) {
a = top[48];
b = top[56];
c = bottom[0];
d = bottom[8];
d1 = a - d;
d2 = a - d + b - c;
top[48] = ((a << 3) - d1 + rnd1) >> 3;
top[56] = ((b << 3) - d2 + rnd2) >> 3;
bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
bottom++;
top++;
rnd2 = 7 - rnd2;
rnd1 = 7 - rnd1;
}
}
static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right)
{
int i;
int a, b, c, d;
int d1, d2;
int rnd1 = 4, rnd2 = 3;
for(i = 0; i < 8; i++) {
a = left[6];
b = left[7];
c = right[0];
d = right[1];
d1 = a - d;
d2 = a - d + b - c;
left[6] = ((a << 3) - d1 + rnd1) >> 3;
left[7] = ((b << 3) - d2 + rnd2) >> 3;
right[0] = ((c << 3) + d2 + rnd1) >> 3;
right[1] = ((d << 3) + d1 + rnd2) >> 3;
right += 8;
left += 8;
rnd2 = 7 - rnd2;
rnd1 = 7 - rnd1;
}
}
/**
* VC-1 in-loop deblocking filter for one line
* @param src source block type
......@@ -672,6 +724,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c;
dsp->vc1_h_overlap = vc1_h_overlap_c;
dsp->vc1_v_overlap = vc1_v_overlap_c;
dsp->vc1_h_s_overlap = vc1_h_s_overlap_c;
dsp->vc1_v_s_overlap = vc1_v_s_overlap_c;
dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c;
dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c;
dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c;
......
......@@ -40,8 +40,10 @@ typedef struct VC1DSPContext {
void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_v_overlap)(uint8_t* src, int stride);
void (*vc1_h_overlap)(uint8_t* src, int stride);
void (*vc1_v_overlap)(uint8_t *src, int stride);
void (*vc1_h_overlap)(uint8_t *src, int stride);
void (*vc1_v_s_overlap)(DCTELEM *top, DCTELEM *bottom);
void (*vc1_h_s_overlap)(DCTELEM *left, DCTELEM *right);
void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
......
0, 0, 38016, 0xf4715db5
0, 3600, 38016, 0xf4715db5
0, 7200, 38016, 0xf4715db5
0, 10800, 38016, 0xf46af0e1
0, 14400, 38016, 0x9c1c2cf1
0, 18000, 38016, 0xff12d87f
0, 21600, 38016, 0x7408432b
0, 25200, 38016, 0x7408432b
0, 28800, 38016, 0x8d11479a
0, 32400, 38016, 0x8d11479a
0, 36000, 38016, 0xc4a121ab
0, 39600, 38016, 0xc4a121ab
0, 43200, 38016, 0xc4a121ab
0, 46800, 38016, 0xc4a121ab
0, 50400, 38016, 0xc4a121ab
0, 0, 38016, 0xa6f15db5
0, 3600, 38016, 0xa6f15db5
0, 7200, 38016, 0xa6f15db5
0, 10800, 38016, 0x5c4ef0e7
0, 14400, 38016, 0x53a42d1d
0, 18000, 38016, 0x68f7d89e
0, 21600, 38016, 0xc15f4368
0, 25200, 38016, 0xc15f4368
0, 28800, 38016, 0xd1bd47a8
0, 32400, 38016, 0xd1bd47a8
0, 36000, 38016, 0xe1e821ca
0, 39600, 38016, 0xe1e821ca
0, 43200, 38016, 0xe1e821ca
0, 46800, 38016, 0xe1e821ca
0, 50400, 38016, 0xe1e821ca
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment