avcodec/rdft: remove sintable

It is redundant with costable. The first half of sintable is identical with the second half of costable. The second half of sintable is negative value of the first half of sintable. The computation is changed to handle sign of sin values, in C code and ARM assembly code. Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>

avcodec/rdft: remove sintable
It is redundant with costable. The first half of sintable is identical with the second half of costable. The second half of sintable is negative value of the first half of sintable. The computation is changed to handle sign of sin values, in C code and ARM assembly code. Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
0780ad9c · Muhammad Faiz · e7d977b4 · 0780ad9c · 0780ad9c · 0780ad9c
Commit 0780ad9c authored Jul 07, 2017 by Muhammad Faiz
Hide whitespace changes
Inline Side-by-side

Showing with 36 additions and 74 deletions

Makefile libavcodec/Makefile +1 -2

rdft_neon.S libavcodec/arm/rdft_neon.S +9 -4

rdft.c libavcodec/rdft.c +24 -44

rdft.h libavcodec/rdft.h +2 -24

No files found.
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -122,8 +122,7 @@ OBJS-$(CONFIG_QSV)                     += qsv.o
 OBJS-$(CONFIG_QSVDEC)                  += qsvdec.o
 OBJS-$(CONFIG_QSVENC)                  += qsvenc.o
 OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
-RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
-OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
+OBJS-$(CONFIG_RDFT)                    += rdft.o
 OBJS-$(CONFIG_RV34DSP)                 += rv34dsp.o
 OBJS-$(CONFIG_SHARED)                  += log2_tab.o reverse.o
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o sinewin_fixed.o

--- a/libavcodec/arm/rdft_neon.S
+++ b/libavcodec/arm/rdft_neon.S
@@ -30,18 +30,21 @@ function ff_rdft_calc_neon, export=1

        lsls            r6,  r6,  #31
        bne             1f
-        add             r0,  r4,  #20
+        add             r0,  r4,  #24
        bl              X(ff_fft_permute_neon)
-        add             r0,  r4,  #20
+        add             r0,  r4,  #24
        mov             r1,  r5
        bl              X(ff_fft_calc_neon)
 1:
        ldr             r12, [r4, #0]           @ nbits
        mov             r2,  #1
+        ldr             r8,  [r4, #20]          @ negative_sin
        lsl             r12, r2,  r12
        add             r0,  r5,  #8
+        lsl             r8,  r8,  #31
        add             r1,  r5,  r12, lsl #2
        lsr             r12, r12, #2
+        vdup.32         d26, r8
        ldr             r2,  [r4, #12]          @ tcos
        sub             r12, r12, #2
        ldr             r3,  [r4, #16]          @ tsin
@@ -55,6 +58,7 @@ function ff_rdft_calc_neon, export=1
        vld1.32         {d5},     [r3,:64]!     @ tsin[i]
        vmov.f32        d18, #0.5               @ k1
        vdup.32         d19, r6
+        veor            d5,  d26, d5
        pld             [r0, #32]
        veor            d19, d18, d19           @ k2
        vmov.i32        d16, #0
@@ -90,6 +94,7 @@ function ff_rdft_calc_neon, export=1
        vld1.32         {d5},     [r3,:64]!     @  tsin[i]
        veor            d24, d22, d17           @  ev.re,-ev.im
        vrev64.32       d3,  d23                @  od.re, od.im
+        veor            d5, d26, d5
        pld             [r2, #32]
        veor            d2,  d3,  d16           @ -od.re, od.im
        pld             [r3, #32]
@@ -140,10 +145,10 @@ function ff_rdft_calc_neon, export=1

        vmul.f32        d22, d22, d18
        vst1.32         {d22},    [r5,:64]
-        add             r0,  r4,  #20
+        add             r0,  r4,  #24
        mov             r1,  r5
        bl              X(ff_fft_permute_neon)
-        add             r0,  r4,  #20
+        add             r0,  r4,  #24
        mov             r1,  r5
        pop             {r4-r8,lr}
        b               X(ff_fft_calc_neon)

--- a/libavcodec/rdft.c
+++ b/libavcodec/rdft.c
@@ -28,28 +28,6 @@
 * (Inverse) Real Discrete Fourier Transforms.
 */

-/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
-#if !CONFIG_HARDCODED_TABLES
-SINTABLE(16);
-SINTABLE(32);
-SINTABLE(64);
-SINTABLE(128);
-SINTABLE(256);
-SINTABLE(512);
-SINTABLE(1024);
-SINTABLE(2048);
-SINTABLE(4096);
-SINTABLE(8192);
-SINTABLE(16384);
-SINTABLE(32768);
-SINTABLE(65536);
-#endif
-static SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
-    NULL, NULL, NULL, NULL,
-    ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
-    ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
-};
-
 /** Map one real FFT into two parallel real even and odd FFTs. Then interleave
 * the two real FFTs into one complex FFT. Unmangle the results.
 * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
@@ -73,20 +51,29 @@ static void rdft_calc_c(RDFTContext *s, FFTSample *data)
    ev.re = data[0];
    data[0] = ev.re+data[1];
    data[1] = ev.re-data[1];
-    for (i = 1; i < (n>>2); i++) {
-        i1 = 2*i;
-        i2 = n-i1;
-        /* Separate even and odd FFTs */
-        ev.re =  k1*(data[i1  ]+data[i2  ]);
-        od.im = -k2*(data[i1  ]-data[i2  ]);
-        ev.im =  k1*(data[i1+1]-data[i2+1]);
-        od.re =  k2*(data[i1+1]+data[i2+1]);
-        /* Apply twiddle factors to the odd FFT and add to the even FFT */
-        data[i1  ] =  ev.re + od.re*tcos[i] - od.im*tsin[i];
-        data[i1+1] =  ev.im + od.im*tcos[i] + od.re*tsin[i];
-        data[i2  ] =  ev.re - od.re*tcos[i] + od.im*tsin[i];
-        data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
+
+#define RDFT_UNMANGLE(sign0, sign1)                                         \
+    for (i = 1; i < (n>>2); i++) {                                          \
+        i1 = 2*i;                                                           \
+        i2 = n-i1;                                                          \
+        /* Separate even and odd FFTs */                                    \
+        ev.re =  k1*(data[i1  ]+data[i2  ]);                                \
+        od.im = -k2*(data[i1  ]-data[i2  ]);                                \
+        ev.im =  k1*(data[i1+1]-data[i2+1]);                                \
+        od.re =  k2*(data[i1+1]+data[i2+1]);                                \
+        /* Apply twiddle factors to the odd FFT and add to the even FFT */  \
+        data[i1  ] =  ev.re + od.re*tcos[i] sign0 od.im*tsin[i];            \
+        data[i1+1] =  ev.im + od.im*tcos[i] sign1 od.re*tsin[i];            \
+        data[i2  ] =  ev.re - od.re*tcos[i] sign1 od.im*tsin[i];            \
+        data[i2+1] = -ev.im + od.im*tcos[i] sign1 od.re*tsin[i];            \
+    }
+
+    if (s->negative_sin) {
+        RDFT_UNMANGLE(+,-)
+    } else {
+        RDFT_UNMANGLE(-,+)
    }
+
    data[2*i+1]=s->sign_convention*data[2*i+1];
    if (s->inverse) {
        data[0] *= k1;
@@ -104,6 +91,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
    s->nbits           = nbits;
    s->inverse         = trans == IDFT_C2R || trans == DFT_C2R;
    s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
+    s->negative_sin    = trans == DFT_C2R || trans == DFT_R2C;

    if (nbits < 4 || nbits > 16)
        return AVERROR(EINVAL);
@@ -113,15 +101,7 @@ av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)

    ff_init_ff_cos_tabs(nbits);
    s->tcos = ff_cos_tabs[nbits];
-    s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
-#if !CONFIG_HARDCODED_TABLES
-    {
-        int i;
-        const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1) * 2 * M_PI / n;
-        for (i = 0; i < (n >> 2); i++)
-            s->tsin[i] = sin(i * theta);
-    }
-#endif
+    s->tsin = ff_cos_tabs[nbits] + (n >> 2);
    s->rdft_calc   = rdft_calc_c;

    if (ARCH_ARM) ff_rdft_init_arm(s);

--- a/libavcodec/rdft.h
+++ b/libavcodec/rdft.h
@@ -25,29 +25,6 @@
 #include "config.h"
 #include "fft.h"

-#if CONFIG_HARDCODED_TABLES
-#   define SINTABLE_CONST const
-#else
-#   define SINTABLE_CONST
-#endif
-
-#define SINTABLE(size) \
-    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
-
-extern SINTABLE(16);
-extern SINTABLE(32);
-extern SINTABLE(64);
-extern SINTABLE(128);
-extern SINTABLE(256);
-extern SINTABLE(512);
-extern SINTABLE(1024);
-extern SINTABLE(2048);
-extern SINTABLE(4096);
-extern SINTABLE(8192);
-extern SINTABLE(16384);
-extern SINTABLE(32768);
-extern SINTABLE(65536);
-
 struct RDFTContext {
    int nbits;
    int inverse;
@@ -55,7 +32,8 @@ struct RDFTContext {

    /* pre/post rotation tables */
    const FFTSample *tcos;
-    SINTABLE_CONST FFTSample *tsin;
+    const FFTSample *tsin;
+    int negative_sin;
    FFTContext fft;
    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
 };