Merge commit 'd3f5b947'

* commit 'd3f5b947': aarch64: opus NEON iMDCT and FFT Merged-by: Michael Niedermayer <michaelni@gmx.at>

Merge commit 'd3f5b947'
* commit 'd3f5b947': aarch64: opus NEON iMDCT and FFT Merged-by: Michael Niedermayer <michaelni@gmx.at>
30cdf384 · Michael Niedermayer · 91d7d790 · d3f5b947 · 30cdf384 · 30cdf384
Commit 30cdf384 authored May 15, 2014 by Michael Niedermayer
8 changed files
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -7,6 +7,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP)             += aarch64/mpegaudiodsp_init.o
 OBJS-$(CONFIG_NEON_CLOBBER_TEST)        += aarch64/neontest.o
 OBJS-$(CONFIG_VIDEODSP)                 += aarch64/videodsp_init.o

+OBJS-$(CONFIG_OPUS_DECODER)             += aarch64/opus_imdct_init.o
 OBJS-$(CONFIG_RV40_DECODER)             += aarch64/rv40dsp_init_aarch64.o
 OBJS-$(CONFIG_VC1_DECODER)              += aarch64/vc1dsp_init_aarch64.o
 OBJS-$(CONFIG_VORBIS_DECODER)           += aarch64/vorbisdsp_init.o
@@ -23,4 +24,5 @@ NEON-OBJS-$(CONFIG_HPELDSP)             += aarch64/hpeldsp_neon.o
 NEON-OBJS-$(CONFIG_MPEGAUDIODSP)        += aarch64/mpegaudiodsp_neon.o
 NEON-OBJS-$(CONFIG_MDCT)                += aarch64/mdct_neon.o

+NEON-OBJS-$(CONFIG_OPUS_DECODER)        += aarch64/opus_imdct_neon.o
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)      += aarch64/vorbisdsp_neon.o
--- a/libavcodec/aarch64/asm-offsets.h
+++ b/libavcodec/aarch64/asm-offsets.h
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_ASM_OFFSETS_H
+#define AVCODEC_AARCH64_ASM_OFFSETS_H
+
+/* CeltIMDCTContext */
+#define CELT_EXPTAB                     0x20
+#define CELT_FFT_N                      0x00
+#define CELT_LEN2                       0x04
+#define CELT_LEN4                       (CELT_LEN2 + 0x4)   // loaded as pair
+#define CELT_TMP                        0x10
+#define CELT_TWIDDLE                    (CELT_TMP + 0x8)    // loaded as pair
+
+#endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */
--- a/libavcodec/aarch64/opus_imdct_init.c
+++ b/libavcodec/aarch64/opus_imdct_init.c
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stddef.h>
+
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/internal.h"
+#include "libavcodec/opus_imdct.h"
+
+#include "asm-offsets.h"
+
+AV_CHECK_OFFSET(CeltIMDCTContext, exptab,         CELT_EXPTAB);
+AV_CHECK_OFFSET(CeltIMDCTContext, fft_n,          CELT_FFT_N);
+AV_CHECK_OFFSET(CeltIMDCTContext, len2,           CELT_LEN2);
+AV_CHECK_OFFSET(CeltIMDCTContext, len4,           CELT_LEN4);
+AV_CHECK_OFFSET(CeltIMDCTContext, tmp,            CELT_TMP);
+AV_CHECK_OFFSET(CeltIMDCTContext, twiddle_exptab, CELT_TWIDDLE);
+
+void ff_celt_imdct_half_neon(CeltIMDCTContext *s, float *dst, const float *src,
+                             ptrdiff_t stride, float scale);
+
+void ff_celt_imdct_init_aarch64(CeltIMDCTContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        s->imdct_half = ff_celt_imdct_half_neon;
+    }
+}
--- a/libavcodec/aarch64/opus_imdct_neon.S
+++ b/libavcodec/aarch64/opus_imdct_neon.S
--- a/libavcodec/opus.h
+++ b/libavcodec/opus.h
@@ -92,8 +92,6 @@ typedef struct OpusRangeCoder {

 typedef struct SilkContext SilkContext;

-typedef struct CeltIMDCTContext CeltIMDCTContext;
-
 typedef struct CeltContext CeltContext;

 typedef struct OpusPacket {
@@ -398,22 +396,6 @@ int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc,
                              enum OpusBandwidth bandwidth, int coded_channels,
                              int duration_ms);

-/**
- * Init an iMDCT of the length 2 * 15 * (2^N)
- */
-int ff_celt_imdct_init(CeltIMDCTContext **s, int N);
-
-/**
- * Free an iMDCT.
- */
-void ff_celt_imdct_uninit(CeltIMDCTContext **s);
-
-/**
- * Calculate the middle half of the iMDCT
- */
-void ff_celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
-                        int src_stride, float scale);
-
 int ff_celt_init(AVCodecContext *avctx, CeltContext **s, int output_channels);

 void ff_celt_free(CeltContext **s);

--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c
@@ -29,6 +29,7 @@
 #include "libavutil/float_dsp.h"

 #include "opus.h"
+#include "opus_imdct.h"

 enum CeltSpread {
    CELT_SPREAD_NONE,
@@ -2095,7 +2096,7 @@ int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc,
        for (j = 0; j < s->blocks; j++) {
            float *dst  = frame->buf + 1024 + j * s->blocksize;

-            ff_celt_imdct_half(imdct, dst + CELT_OVERLAP / 2, s->coeffs[i] + j,
+            imdct->imdct_half(imdct, dst + CELT_OVERLAP / 2, s->coeffs[i] + j,
                              s->blocks, imdct_scale);
            s->dsp.vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2,
                                      celt_window, CELT_OVERLAP / 2);

--- a/libavcodec/opus_imdct.c
+++ b/libavcodec/opus_imdct.c
@@ -25,12 +25,19 @@

 #include <float.h>
 #include <math.h>
+#include <stddef.h>
+
+#include "config.h"

 #include "libavutil/attributes.h"
 #include "libavutil/common.h"

-#include "fft.h"
+#include "avfft.h"
 #include "opus.h"
+#include "opus_imdct.h"
+
+// minimal iMDCT size to make SIMD opts easier
+#define CELT_MIN_IMDCT_SIZE 120

 // complex c = a * b
 #define CMUL3(cre, cim, are, aim, bre, bim)          \
@@ -59,18 +66,6 @@ do {                                                 \
    (d).im = -ri + ir;                               \
 } while (0)

-struct CeltIMDCTContext {
-    int fft_n;
-    int len2;
-    int len4;
-
-    FFTComplex *tmp;
-
-    FFTComplex *twiddle_exptab;
-
-    FFTComplex *exptab[6];
-};
-
 av_cold void ff_celt_imdct_uninit(CeltIMDCTContext **ps)
 {
    CeltIMDCTContext *s = *ps;
@@ -89,6 +84,9 @@ av_cold void ff_celt_imdct_uninit(CeltIMDCTContext **ps)
    av_freep(ps);
 }

+static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
+                            ptrdiff_t stride, float scale);
+
 av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N)
 {
    CeltIMDCTContext *s;
@@ -96,7 +94,7 @@ av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N)
    int len  = 2 * len2;
    int i, j;

-    if (len2 > CELT_MAX_FRAME_SIZE)
+    if (len2 > CELT_MAX_FRAME_SIZE || len2 < CELT_MIN_IMDCT_SIZE)
        return AVERROR(EINVAL);

    s = av_mallocz(sizeof(*s));
@@ -136,6 +134,11 @@ av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N)
    for (j = 15; j < 19; j++)
        s->exptab[0][j] = s->exptab[0][j - 15];

+    s->imdct_half = celt_imdct_half;
+
+    if (ARCH_AARCH64)
+        ff_celt_imdct_init_aarch64(s);
+
    *ps = s;

    return 0;
@@ -144,7 +147,7 @@ fail:
    return AVERROR(ENOMEM);
 }

-static void fft5(FFTComplex *out, const FFTComplex *in, int stride)
+static void fft5(FFTComplex *out, const FFTComplex *in, ptrdiff_t stride)
 {
    // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5)
    static const FFTComplex fact[] = { { 0.30901699437494745,  0.95105651629515353 },
@@ -177,7 +180,7 @@ static void fft5(FFTComplex *out, const FFTComplex *in, int stride)
    out[4].im = in[0].im + z[0][3].im + z[1][2].im + z[2][1].im + z[3][0].im;
 }

-static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int stride)
+static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, ptrdiff_t stride)
 {
    const FFTComplex *exptab = s->exptab[0];
    FFTComplex tmp[5];
@@ -212,7 +215,8 @@ static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, in
 /*
 * FFT of the length 15 * (2^N)
 */
-static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int N, int stride)
+static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in,
+                     int N, ptrdiff_t stride)
 {
    if (N) {
        const FFTComplex *exptab = s->exptab[N];
@@ -237,8 +241,8 @@ static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in,
        fft15(s, out, in, stride);
 }

-void ff_celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
-                        int stride, float scale)
+static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src,
+                            ptrdiff_t stride, float scale)
 {
    FFTComplex *z = (FFTComplex *)dst;
    const int len8 = s->len4 / 2;

--- a/libavcodec/opus_imdct.h
+++ b/libavcodec/opus_imdct.h
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_OPUS_IMDCT_H
+#define AVCODEC_OPUS_IMDCT_H
+
+#include <stddef.h>
+
+#include "avfft.h"
+
+typedef struct CeltIMDCTContext {
+    int fft_n;
+    int len2;
+    int len4;
+
+    FFTComplex *tmp;
+
+    FFTComplex *twiddle_exptab;
+
+    FFTComplex *exptab[6];
+
+    /**
+     * Calculate the middle half of the iMDCT
+     */
+    void (*imdct_half)(struct CeltIMDCTContext *s, float *dst, const float *src,
+                       ptrdiff_t src_stride, float scale);
+} CeltIMDCTContext;
+
+/**
+ * Init an iMDCT of the length 2 * 15 * (2^N)
+ */
+int ff_celt_imdct_init(CeltIMDCTContext **s, int N);
+
+/**
+ * Free an iMDCT.
+ */
+void ff_celt_imdct_uninit(CeltIMDCTContext **s);
+
+
+void ff_celt_imdct_init_aarch64(CeltIMDCTContext *s);
+
+#endif /* AVCODEC_OPUS_IMDCT_H */