Commit a678d667 authored by Timothy Gu's avatar Timothy Gu

vf_blend: Use integers for divide mode

2.5x faster for 8-bit mode without autovectorization in GCC, 2x
slower with it on x86. However, since the platforms we enable GCC
autovectorization on most probably has support for SSE2
optimization (added in the subsequent commit), this commit should
in general do good.
parent 4b750104
......@@ -247,7 +247,7 @@ DEFINE_BLEND8(hardlight, (B < 128) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
DEFINE_BLEND8(hardmix, (A < (255 - B)) ? 0: 255)
DEFINE_BLEND8(darken, FFMIN(A, B))
DEFINE_BLEND8(lighten, FFMAX(A, B))
DEFINE_BLEND8(divide, av_clip_uint8(((float)A / ((float)B) * 255)))
DEFINE_BLEND8(divide, av_clip_uint8(B == 0 ? 255 : 255 * A / B))
DEFINE_BLEND8(dodge, DODGE(A, B))
DEFINE_BLEND8(burn, BURN(A, B))
DEFINE_BLEND8(softlight, (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - fabs(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - fabs(B - 127.5)/255))
......@@ -287,7 +287,7 @@ DEFINE_BLEND16(hardlight, (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
DEFINE_BLEND16(hardmix, (A < (65535 - B)) ? 0: 65535)
DEFINE_BLEND16(darken, FFMIN(A, B))
DEFINE_BLEND16(lighten, FFMAX(A, B))
DEFINE_BLEND16(divide, av_clip_uint16(((float)A / ((float)B) * 65535)))
DEFINE_BLEND16(divide, av_clip_uint16(B == 0 ? 65535 : 65535 * A / B))
DEFINE_BLEND16(dodge, DODGE(A, B))
DEFINE_BLEND16(burn, BURN(A, B))
DEFINE_BLEND16(softlight, (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment