Commit 56cea329 authored by Clément Bœsch's avatar Clément Bœsch

lavfi/lut3d: faster tetrahedral interpolation.

Increase performance by fetching only the necessary points.

1097 → 917 decicyles.
parent b6ee25e3
...@@ -144,38 +144,44 @@ static inline struct rgbvec interp_tetrahedral(const LUT3DContext *lut3d, ...@@ -144,38 +144,44 @@ static inline struct rgbvec interp_tetrahedral(const LUT3DContext *lut3d,
const int next[] = {NEXT(s->r), NEXT(s->g), NEXT(s->b)}; const int next[] = {NEXT(s->r), NEXT(s->g), NEXT(s->b)};
const struct rgbvec d = {s->r - prev[0], s->g - prev[1], s->b - prev[2]}; const struct rgbvec d = {s->r - prev[0], s->g - prev[1], s->b - prev[2]};
const struct rgbvec c000 = lut3d->lut[prev[0]][prev[1]][prev[2]]; const struct rgbvec c000 = lut3d->lut[prev[0]][prev[1]][prev[2]];
const struct rgbvec c001 = lut3d->lut[prev[0]][prev[1]][next[2]];
const struct rgbvec c010 = lut3d->lut[prev[0]][next[1]][prev[2]];
const struct rgbvec c011 = lut3d->lut[prev[0]][next[1]][next[2]];
const struct rgbvec c100 = lut3d->lut[next[0]][prev[1]][prev[2]];
const struct rgbvec c101 = lut3d->lut[next[0]][prev[1]][next[2]];
const struct rgbvec c110 = lut3d->lut[next[0]][next[1]][prev[2]];
const struct rgbvec c111 = lut3d->lut[next[0]][next[1]][next[2]]; const struct rgbvec c111 = lut3d->lut[next[0]][next[1]][next[2]];
struct rgbvec c; struct rgbvec c;
if (d.r > d.g) { if (d.r > d.g) {
if (d.g > d.b) { if (d.g > d.b) {
const struct rgbvec c100 = lut3d->lut[next[0]][prev[1]][prev[2]];
const struct rgbvec c110 = lut3d->lut[next[0]][next[1]][prev[2]];
c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * c111.r; c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * c111.r;
c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * c111.g; c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * c111.g;
c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * c111.b; c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * c111.b;
} else if (d.r > d.b) { } else if (d.r > d.b) {
const struct rgbvec c100 = lut3d->lut[next[0]][prev[1]][prev[2]];
const struct rgbvec c101 = lut3d->lut[next[0]][prev[1]][next[2]];
c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * c111.r; c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * c111.r;
c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * c111.g; c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * c111.g;
c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * c111.b; c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * c111.b;
} else { } else {
const struct rgbvec c001 = lut3d->lut[prev[0]][prev[1]][next[2]];
const struct rgbvec c101 = lut3d->lut[next[0]][prev[1]][next[2]];
c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * c111.r; c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * c111.r;
c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * c111.g; c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * c111.g;
c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * c111.b; c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * c111.b;
} }
} else { } else {
if (d.b > d.g) { if (d.b > d.g) {
const struct rgbvec c001 = lut3d->lut[prev[0]][prev[1]][next[2]];
const struct rgbvec c011 = lut3d->lut[prev[0]][next[1]][next[2]];
c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * c111.r; c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * c111.r;
c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * c111.g; c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * c111.g;
c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * c111.b; c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * c111.b;
} else if (d.b > d.r) { } else if (d.b > d.r) {
const struct rgbvec c010 = lut3d->lut[prev[0]][next[1]][prev[2]];
const struct rgbvec c011 = lut3d->lut[prev[0]][next[1]][next[2]];
c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * c111.r; c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * c111.r;
c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * c111.g; c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * c111.g;
c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * c111.b; c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * c111.b;
} else { } else {
const struct rgbvec c010 = lut3d->lut[prev[0]][next[1]][prev[2]];
const struct rgbvec c110 = lut3d->lut[next[0]][next[1]][prev[2]];
c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * c111.r; c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * c111.r;
c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * c111.g; c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * c111.g;
c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * c111.b; c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * c111.b;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment