Commit 869efbf9 authored by Martin Vignali's avatar Martin Vignali

avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)

parent 713f9c5b
...@@ -25,16 +25,18 @@ ...@@ -25,16 +25,18 @@
SECTION_RODATA SECTION_RODATA
pb_128: times 16 db 128 pb_128: times 16 db 128
pb_128_0 : times 8 db 0, 128
SECTION .text SECTION .text
%macro THRESHOLD_8 0 ;%1 depth (8 or 16) ; %2 b or w ; %3 constant
%macro THRESHOLD 3
%if ARCH_X86_64 %if ARCH_X86_64
cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
mov wd, dword wm mov wd, dword wm
mov hd, dword hm mov hd, dword hm
%else %else
cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
mov wd, r10m mov wd, r10m
%define ilinesizeq r5mp %define ilinesizeq r5mp
%define tlinesizeq r6mp %define tlinesizeq r6mp
...@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x ...@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
%define olinesizeq r9mp %define olinesizeq r9mp
%define hd r11mp %define hd r11mp
%endif %endif
VBROADCASTI128 m4, [pb_128] VBROADCASTI128 m4, [%3]
%if %1 == 16
add wq, wq ; w *= 2 (16 bits instead of 8)
%endif
add inq, wq add inq, wq
add thresholdq, wq add thresholdq, wq
add minq, wq add minq, wq
...@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x ...@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
movu m3, [maxq + xq] movu m3, [maxq + xq]
pxor m0, m4 pxor m0, m4
pxor m1, m4 pxor m1, m4
pcmpgtb m0, m1 pcmpgt%2 m0, m1
PBLENDVB m3, m2, m0 PBLENDVB m3, m2, m0
movu [outq + xq], m3 movu [outq + xq], m3
add xq, mmsize add xq, mmsize
...@@ -77,9 +82,11 @@ RET ...@@ -77,9 +82,11 @@ RET
%endmacro %endmacro
INIT_XMM sse4 INIT_XMM sse4
THRESHOLD_8 THRESHOLD 8, b, pb_128
THRESHOLD 16, w, pb_128_0
%if HAVE_AVX2_EXTERNAL %if HAVE_AVX2_EXTERNAL
INIT_YMM avx2 INIT_YMM avx2
THRESHOLD_8 THRESHOLD 8, b, pb_128
THRESHOLD 16, w, pb_128_0
%endif %endif
...@@ -23,21 +23,20 @@ ...@@ -23,21 +23,20 @@
#include "libavutil/x86/cpu.h" #include "libavutil/x86/cpu.h"
#include "libavfilter/threshold.h" #include "libavfilter/threshold.h"
void ff_threshold8_sse4(const uint8_t *in, const uint8_t *threshold, #define THRESHOLD_FUNC(depth, opt) \
const uint8_t *min, const uint8_t *max, void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t *threshold,\
uint8_t *out, const uint8_t *min, const uint8_t *max, \
ptrdiff_t ilinesize, ptrdiff_t tlinesize, uint8_t *out, \
ptrdiff_t flinesize, ptrdiff_t slinesize, ptrdiff_t ilinesize, ptrdiff_t tlinesize, \
ptrdiff_t olinesize, ptrdiff_t flinesize, ptrdiff_t slinesize, \
int w, int h); ptrdiff_t olinesize, \
void ff_threshold8_avx2(const uint8_t *in, const uint8_t *threshold,
const uint8_t *min, const uint8_t *max,
uint8_t *out,
ptrdiff_t ilinesize, ptrdiff_t tlinesize,
ptrdiff_t flinesize, ptrdiff_t slinesize,
ptrdiff_t olinesize,
int w, int h); int w, int h);
THRESHOLD_FUNC(8, sse4)
THRESHOLD_FUNC(8, avx2)
THRESHOLD_FUNC(16, sse4)
THRESHOLD_FUNC(16, avx2)
av_cold void ff_threshold_init_x86(ThresholdContext *s) av_cold void ff_threshold_init_x86(ThresholdContext *s)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
...@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s) ...@@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
if (EXTERNAL_AVX2_FAST(cpu_flags)) { if (EXTERNAL_AVX2_FAST(cpu_flags)) {
s->threshold = ff_threshold8_avx2; s->threshold = ff_threshold8_avx2;
} }
} else if (s->depth == 16) {
if (EXTERNAL_SSE4(cpu_flags)) {
s->threshold = ff_threshold16_sse4;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
s->threshold = ff_threshold16_avx2;
}
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment