Commit bbcaf25d authored by James Almer's avatar James Almer Committed by Michael Niedermayer

lavu/sha512: Fully unroll the transform function loops

crypto_bench SHA-512 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.7.3 x86_64

Before:
lavu       SHA-512      size: 1048576  runs:   1024  time:   12.737 +- 0.147

After:
lavu       SHA-512      size: 1048576  runs:   1024  time:   11.670 +- 0.173
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 7e4fe516
...@@ -150,27 +150,32 @@ static void sha512_transform(uint64_t *state, const uint8_t buffer[128]) ...@@ -150,27 +150,32 @@ static void sha512_transform(uint64_t *state, const uint8_t buffer[128])
a = T1 + T2; a = T1 + T2;
} }
#else #else
for (i = 0; i < 16 - 7;) {
ROUND512_0_TO_15(a, b, c, d, e, f, g, h);
ROUND512_0_TO_15(h, a, b, c, d, e, f, g);
ROUND512_0_TO_15(g, h, a, b, c, d, e, f);
ROUND512_0_TO_15(f, g, h, a, b, c, d, e);
ROUND512_0_TO_15(e, f, g, h, a, b, c, d);
ROUND512_0_TO_15(d, e, f, g, h, a, b, c);
ROUND512_0_TO_15(c, d, e, f, g, h, a, b);
ROUND512_0_TO_15(b, c, d, e, f, g, h, a);
}
for (; i < 80 - 7;) { #define R512_0 \
ROUND512_16_TO_80(a, b, c, d, e, f, g, h); ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \
ROUND512_16_TO_80(h, a, b, c, d, e, f, g); ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \
ROUND512_16_TO_80(g, h, a, b, c, d, e, f); ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \
ROUND512_16_TO_80(f, g, h, a, b, c, d, e); ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \
ROUND512_16_TO_80(e, f, g, h, a, b, c, d); ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \
ROUND512_16_TO_80(d, e, f, g, h, a, b, c); ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \
ROUND512_16_TO_80(c, d, e, f, g, h, a, b); ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \
ROUND512_16_TO_80(b, c, d, e, f, g, h, a); ROUND512_0_TO_15(b, c, d, e, f, g, h, a)
}
i = 0;
R512_0; R512_0;
#define R512_16 \
ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \
ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \
ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \
ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \
ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \
ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \
ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \
ROUND512_16_TO_80(b, c, d, e, f, g, h, a)
R512_16; R512_16; R512_16; R512_16;
R512_16; R512_16; R512_16; R512_16;
#endif #endif
state[0] += a; state[0] += a;
state[1] += b; state[1] += b;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment