Commit f0ecbb13 authored by Martin Storsjö's avatar Martin Storsjö

arm/aarch64: vp9lpf: Calculate !hev directly

Previously we first calculated hev, and then negated it.

Since we were able to schedule the negation in the middle
of another calculation, we don't see any gain in all cases.

Before:                     Cortex A7      A8      A9     A53  A53/AArch64
vp9_loop_filter_v_4_8_neon:     147.0   129.0   115.8    89.0         88.7
vp9_loop_filter_v_8_8_neon:     242.0   198.5   174.7   140.0        136.7
vp9_loop_filter_v_16_8_neon:    500.0   419.5   382.7   293.0        275.7
vp9_loop_filter_v_16_16_neon:   971.2   825.5   731.5   579.0        453.0
After:
vp9_loop_filter_v_4_8_neon:     143.0   127.7   114.8    88.0         87.7
vp9_loop_filter_v_8_8_neon:     241.0   197.2   173.7   140.0        136.7
vp9_loop_filter_v_16_8_neon:    497.0   419.5   379.7   293.0        275.7
vp9_loop_filter_v_16_16_neon:   965.2   818.7   731.4   579.0        452.0

This is cherrypicked from libav commit
e1f9de86.
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent 148cc0bb
...@@ -292,7 +292,7 @@ ...@@ -292,7 +292,7 @@
.if \mix != 0 .if \mix != 0
sxtl v1.8h, v1.8b sxtl v1.8h, v1.8b
.endif .endif
cmhi v5\sz, v5\sz, v3\sz // hev cmhs v5\sz, v3\sz, v5\sz // !hev
.if \wd == 8 .if \wd == 8
// If a 4/8 or 8/4 mix is used, clear the relevant half of v6 // If a 4/8 or 8/4 mix is used, clear the relevant half of v6
.if \mix != 0 .if \mix != 0
...@@ -306,11 +306,10 @@ ...@@ -306,11 +306,10 @@
.elseif \wd == 8 .elseif \wd == 8
bic v4\sz, v4\sz, v6\sz // fm && !flat8in bic v4\sz, v4\sz, v6\sz // fm && !flat8in
.endif .endif
mvn v5\sz, v5\sz // !hev and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
.if \wd == 16 .if \wd == 16
and v7\sz, v7\sz, v6\sz // flat8out && flat8in && fm and v7\sz, v7\sz, v6\sz // flat8out && flat8in && fm
.endif .endif
and v5\sz, v5\sz, v4\sz // !hev && fm && !flat8in
mul_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp5\().8h, \tmp5\().8h, \sz // 3 * (q0 - p0) mul_sz \tmp3\().8h, \tmp4\().8h, \tmp3\().8h, \tmp4\().8h, \tmp5\().8h, \tmp5\().8h, \sz // 3 * (q0 - p0)
bic \tmp1\sz, \tmp1\sz, v5\sz // if (!hev) av_clip_int8 = 0 bic \tmp1\sz, \tmp1\sz, v5\sz // if (!hev) av_clip_int8 = 0
......
...@@ -141,7 +141,7 @@ ...@@ -141,7 +141,7 @@
.if \wd == 8 .if \wd == 8
vcle.u8 d6, d6, d0 @ flat8in vcle.u8 d6, d6, d0 @ flat8in
.endif .endif
vcgt.u8 d5, d5, d3 @ hev vcle.u8 d5, d5, d3 @ !hev
.if \wd == 8 .if \wd == 8
vand d6, d6, d4 @ flat8in && fm vand d6, d6, d4 @ flat8in && fm
.endif .endif
...@@ -151,11 +151,10 @@ ...@@ -151,11 +151,10 @@
.elseif \wd == 8 .elseif \wd == 8
vbic d4, d4, d6 @ fm && !flat8in vbic d4, d4, d6 @ fm && !flat8in
.endif .endif
vmvn d5, d5 @ !hev vand d5, d5, d4 @ !hev && fm && !flat8in
.if \wd == 16 .if \wd == 16
vand d7, d7, d6 @ flat8out && flat8in && fm vand d7, d7, d6 @ flat8out && flat8in && fm
.endif .endif
vand d5, d5, d4 @ !hev && fm && !flat8in
vmul.s16 \tmpq2, \tmpq2, \tmpq3 @ 3 * (q0 - p0) vmul.s16 \tmpq2, \tmpq2, \tmpq3 @ 3 * (q0 - p0)
vbic \tmp1, \tmp1, d5 @ if (!hev) av_clip_int8 = 0 vbic \tmp1, \tmp1, d5 @ if (!hev) av_clip_int8 = 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment