Commit b6e17112 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Michael Niedermayer

vp9/x86: invert hu_ipred left array ordering.

Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent e67496fe
...@@ -2347,6 +2347,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** ...@@ -2347,6 +2347,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
uint8_t needs_top:1; uint8_t needs_top:1;
uint8_t needs_topleft:1; uint8_t needs_topleft:1;
uint8_t needs_topright:1; uint8_t needs_topright:1;
uint8_t invert_left:1;
} edges[N_INTRA_PRED_MODES] = { } edges[N_INTRA_PRED_MODES] = {
[VERT_PRED] = { .needs_top = 1 }, [VERT_PRED] = { .needs_top = 1 },
[HOR_PRED] = { .needs_left = 1 }, [HOR_PRED] = { .needs_left = 1 },
...@@ -2356,7 +2357,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** ...@@ -2356,7 +2357,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
[VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 }, [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
[HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 }, [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
[VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 }, [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
[HOR_UP_PRED] = { .needs_left = 1 }, [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
[TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 }, [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
[LEFT_DC_PRED] = { .needs_left = 1 }, [LEFT_DC_PRED] = { .needs_left = 1 },
[TOP_DC_PRED] = { .needs_top = 1 }, [TOP_DC_PRED] = { .needs_top = 1 },
...@@ -2429,13 +2430,24 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** ...@@ -2429,13 +2430,24 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
uint8_t *dst = x == 0 ? dst_edge : dst_inner; uint8_t *dst = x == 0 ? dst_edge : dst_inner;
ptrdiff_t stride = x == 0 ? stride_edge : stride_inner; ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
if (n_px_need <= n_px_have) { if (edges[mode].invert_left) {
for (i = 0; i < n_px_need; i++) if (n_px_need <= n_px_have) {
l[n_px_need - 1 - i] = dst[i * stride - 1]; for (i = 0; i < n_px_need; i++)
l[i] = dst[i * stride - 1];
} else {
for (i = 0; i < n_px_have; i++)
l[i] = dst[i * stride - 1];
memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
}
} else { } else {
for (i = 0; i < n_px_have; i++) if (n_px_need <= n_px_have) {
l[n_px_need - 1 - i] = dst[i * stride - 1]; for (i = 0; i < n_px_need; i++)
memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have); l[n_px_need - 1 - i] = dst[i * stride - 1];
} else {
for (i = 0; i < n_px_have; i++)
l[n_px_need - 1 - i] = dst[i * stride - 1];
memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
}
} }
} else { } else {
memset(l, 129, 4 << tx); memset(l, 129, 4 << tx);
......
...@@ -786,7 +786,7 @@ def_vert_left(32) ...@@ -786,7 +786,7 @@ def_vert_left(32)
static void hor_up_4x4_c(uint8_t *dst, ptrdiff_t stride, static void hor_up_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top) const uint8_t *left, const uint8_t *top)
{ {
int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0]; int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
DST(0,0) = (l0 + l1 + 1) >> 1; DST(0,0) = (l0 + l1 + 1) >> 1;
DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2; DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
...@@ -805,17 +805,17 @@ static void hor_up_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \ ...@@ -805,17 +805,17 @@ static void hor_up_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
uint8_t v[size*2 - 2]; \ uint8_t v[size*2 - 2]; \
\ \
for (i = 0; i < size - 2; i++) { \ for (i = 0; i < size - 2; i++) { \
v[i*2 ] = (left[size - i - 1] + left[size - i - 2] + 1) >> 1; \ v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \
v[i*2 + 1] = (left[size - i - 1] + left[size - i - 2] * 2 + left[size - i - 3] + 2) >> 2; \ v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
} \ } \
v[size*2 - 4] = (left[1] + left[0] + 1) >> 1; \ v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \
v[size*2 - 3] = (left[1] + left[0] * 3 + 2) >> 2; \ v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \
\ \
for (j = 0; j < size / 2; j++) \ for (j = 0; j < size / 2; j++) \
memcpy(dst + j*stride, v + j*2, size); \ memcpy(dst + j*stride, v + j*2, size); \
for (j = size / 2; j < size; j++) { \ for (j = size / 2; j < size; j++) { \
memcpy(dst + j*stride, v + j*2, size*2 - 2 - j*2); \ memcpy(dst + j*stride, v + j*2, size*2 - 2 - j*2); \
memset(dst + j*stride + size*2 - 2 - j*2, left[0], \ memset(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \
2 + j*2 - size); \ 2 + j*2 - size); \
} \ } \
} }
......
...@@ -63,13 +63,11 @@ pb_6xm1_246_8toE: times 6 db -1 ...@@ -63,13 +63,11 @@ pb_6xm1_246_8toE: times 6 db -1
pb_6xm1_BDF_0to6: times 6 db -1 pb_6xm1_BDF_0to6: times 6 db -1
db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6 db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
pb_7to1_9x0: db 7, 6, 5, 4
pb_3to1_5x0: db 3, 2, 1
times 9 db 0
pb_Fto0: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
pb_2: times 32 db 2 pb_2: times 32 db 2
pb_15: times 16 db 15 pb_15: times 16 db 15
pb_0to2_5x3: db 0, 1, 2
times 5 db 3
cextern pb_1 cextern pb_1
cextern pb_3 cextern pb_3
...@@ -1420,7 +1418,7 @@ HD_XMM_FUNCS avx ...@@ -1420,7 +1418,7 @@ HD_XMM_FUNCS avx
INIT_MMX ssse3 INIT_MMX ssse3
cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
movd m0, [lq] movd m0, [lq]
pshufb m0, [pb_3to1_5x0] pshufb m0, [pb_0to2_5x3]
psrlq m1, m0, 8 psrlq m1, m0, 8
psrlq m2, m1, 8 psrlq m2, m1, 8
LOWPASS 2, 1, 0, 3 LOWPASS 2, 1, 0, 3
...@@ -1441,7 +1439,7 @@ cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l ...@@ -1441,7 +1439,7 @@ cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
INIT_XMM %1 INIT_XMM %1
cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l
movq m0, [lq] movq m0, [lq]
pshufb m0, [pb_7to1_9x0] pshufb m0, [pb_0to6_9x7]
psrldq m1, m0, 1 psrldq m1, m0, 1
psrldq m2, m1, 1 psrldq m2, m1, 1
LOWPASS 2, 1, 0, 3 LOWPASS 2, 1, 0, 3
...@@ -1466,7 +1464,6 @@ cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l ...@@ -1466,7 +1464,6 @@ cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l
INIT_XMM %1 INIT_XMM %1
cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l
mova m0, [lq] mova m0, [lq]
pshufb m0, [pb_Fto0]
mova m3, [pb_2toE_3xF] mova m3, [pb_2toE_3xF]
pshufb m1, m0, [pb_1toE_2xF] pshufb m1, m0, [pb_1toE_2xF]
pshufb m2, m0, m3 pshufb m2, m0, m3
...@@ -1494,12 +1491,9 @@ cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l ...@@ -1494,12 +1491,9 @@ cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l
INIT_XMM %1 INIT_XMM %1
cglobal vp9_ipred_hu_32x32, 3, 7, 7, dst, stride, l cglobal vp9_ipred_hu_32x32, 3, 7, 7, dst, stride, l
mova m0, [lq] mova m1, [lq]
mova m1, [lq+16] mova m0, [lq+16]
mova m2, [pb_Fto0]
mova m4, [pb_2toE_3xF] mova m4, [pb_2toE_3xF]
pshufb m0, m2
pshufb m1, m2
palignr m2, m0, m1, 1 palignr m2, m0, m1, 1
palignr m3, m0, m1, 2 palignr m3, m0, m1, 2
LOWPASS 3, 2, 1, 5 LOWPASS 3, 2, 1, 5
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment