Commit 7a4a5515 authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

hevc: use intreadwrite

When dealing with MVs, both components may be processed at a time.

On Win64, 560 to 539 cycles for derive_spatial_merge_candidates.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 8da1defe
...@@ -1712,8 +1712,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ...@@ -1712,8 +1712,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
} }
if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) { if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
lc->pu.mvd.x = 0; AV_ZERO32(&lc->pu.mvd);
lc->pu.mvd.y = 0;
} else { } else {
ff_hevc_hls_mvd_coding(s, x0, y0, 1); ff_hevc_hls_mvd_coding(s, x0, y0, 1);
} }
......
...@@ -661,7 +661,7 @@ typedef struct Mv { ...@@ -661,7 +661,7 @@ typedef struct Mv {
} Mv; } Mv;
typedef struct MvField { typedef struct MvField {
Mv mv[2]; DECLARE_ALIGNED(4, Mv, mv)[2];
int8_t ref_idx[2]; int8_t ref_idx[2];
int8_t pred_flag; int8_t pred_flag;
} MvField; } MvField;
......
...@@ -125,6 +125,7 @@ static int isDiffMER(HEVCContext *s, int xN, int yN, int xP, int yP) ...@@ -125,6 +125,7 @@ static int isDiffMER(HEVCContext *s, int xN, int yN, int xP, int yP)
yN >> plevel == yP >> plevel; yN >> plevel == yP >> plevel;
} }
#define MATCH_MV(x) (AV_RN32A(&A.x) == AV_RN32A(&B.x))
#define MATCH(x) (A.x == B.x) #define MATCH(x) (A.x == B.x)
// check if the mv's and refidx are the same between A and B // check if the mv's and refidx are the same between A and B
...@@ -134,12 +135,12 @@ static int compareMVrefidx(struct MvField A, struct MvField B) ...@@ -134,12 +135,12 @@ static int compareMVrefidx(struct MvField A, struct MvField B)
int b_pf = B.pred_flag; int b_pf = B.pred_flag;
if (a_pf == b_pf) { if (a_pf == b_pf) {
if (a_pf == PF_BI) { if (a_pf == PF_BI) {
return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y) && return MATCH(ref_idx[0]) && MATCH_MV(mv[0]) &&
MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); MATCH(ref_idx[1]) && MATCH_MV(mv[1]);
} else if (a_pf == PF_L0) { } else if (a_pf == PF_L0) {
return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y); return MATCH(ref_idx[0]) && MATCH_MV(mv[0]);
} else if (a_pf == PF_L1) { } else if (a_pf == PF_L1) {
return MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); return MATCH(ref_idx[1]) && MATCH_MV(mv[1]);
} }
} }
return 0; return 0;
...@@ -505,15 +506,12 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, ...@@ -505,15 +506,12 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
if ((l0_cand.pred_flag & PF_L0) && (l1_cand.pred_flag & PF_L1) && if ((l0_cand.pred_flag & PF_L0) && (l1_cand.pred_flag & PF_L1) &&
(refPicList[0].list[l0_cand.ref_idx[0]] != (refPicList[0].list[l0_cand.ref_idx[0]] !=
refPicList[1].list[l1_cand.ref_idx[1]] || refPicList[1].list[l1_cand.ref_idx[1]] ||
l0_cand.mv[0].x != l1_cand.mv[1].x || AV_RN32A(&l0_cand.mv[0]) != AV_RN32A(&l1_cand.mv[1]))) {
l0_cand.mv[0].y != l1_cand.mv[1].y)) {
mergecandlist[nb_merge_cand].ref_idx[0] = l0_cand.ref_idx[0]; mergecandlist[nb_merge_cand].ref_idx[0] = l0_cand.ref_idx[0];
mergecandlist[nb_merge_cand].ref_idx[1] = l1_cand.ref_idx[1]; mergecandlist[nb_merge_cand].ref_idx[1] = l1_cand.ref_idx[1];
mergecandlist[nb_merge_cand].pred_flag = PF_BI; mergecandlist[nb_merge_cand].pred_flag = PF_BI;
mergecandlist[nb_merge_cand].mv[0].x = l0_cand.mv[0].x; AV_COPY32(&mergecandlist[nb_merge_cand].mv[0], &l0_cand.mv[0]);
mergecandlist[nb_merge_cand].mv[0].y = l0_cand.mv[0].y; AV_COPY32(&mergecandlist[nb_merge_cand].mv[1], &l1_cand.mv[1]);
mergecandlist[nb_merge_cand].mv[1].x = l1_cand.mv[1].x;
mergecandlist[nb_merge_cand].mv[1].y = l1_cand.mv[1].y;
if (merge_idx == nb_merge_cand) return; if (merge_idx == nb_merge_cand) return;
nb_merge_cand++; nb_merge_cand++;
} }
...@@ -523,10 +521,8 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, ...@@ -523,10 +521,8 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
// append Zero motion vector candidates // append Zero motion vector candidates
while (nb_merge_cand < s->sh.max_num_merge_cand) { while (nb_merge_cand < s->sh.max_num_merge_cand) {
mergecandlist[nb_merge_cand].pred_flag = PF_L0 + ((s->sh.slice_type == B_SLICE) << 1); mergecandlist[nb_merge_cand].pred_flag = PF_L0 + ((s->sh.slice_type == B_SLICE) << 1);
mergecandlist[nb_merge_cand].mv[0].x = 0; AV_ZERO32(mergecandlist[nb_merge_cand].mv+0);
mergecandlist[nb_merge_cand].mv[0].y = 0; AV_ZERO32(mergecandlist[nb_merge_cand].mv+1);
mergecandlist[nb_merge_cand].mv[1].x = 0;
mergecandlist[nb_merge_cand].mv[1].y = 0;
mergecandlist[nb_merge_cand].ref_idx[0] = zero_idx < nb_refs ? zero_idx : 0; mergecandlist[nb_merge_cand].ref_idx[0] = zero_idx < nb_refs ? zero_idx : 0;
mergecandlist[nb_merge_cand].ref_idx[1] = zero_idx < nb_refs ? zero_idx : 0; mergecandlist[nb_merge_cand].ref_idx[1] = zero_idx < nb_refs ? zero_idx : 0;
...@@ -545,7 +541,7 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW, ...@@ -545,7 +541,7 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW,
{ {
int singleMCLFlag = 0; int singleMCLFlag = 0;
int nCS = 1 << log2_cb_size; int nCS = 1 << log2_cb_size;
struct MvField mergecand_list[MRG_MAX_NUM_CANDS] = { { { { 0 } } } }; LOCAL_ALIGNED(4, MvField, mergecand_list, [MRG_MAX_NUM_CANDS]);
int nPbW2 = nPbW; int nPbW2 = nPbW;
int nPbH2 = nPbH; int nPbH2 = nPbH;
HEVCLocalContext *lc = s->HEVClc; HEVCLocalContext *lc = s->HEVClc;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment