Commit d2119f62 authored by Rostislav Pehlivanov's avatar Rostislav Pehlivanov

imdct15: rename to mdct15 and add a forward transform

Handles strides (needed for Opus transients), does pre-reindexing and folding
without needing a copy.
Signed-off-by: 's avatarRostislav Pehlivanov <atomnuker@gmail.com>
parent 373ee2c6
...@@ -2107,7 +2107,7 @@ CONFIG_EXTRA=" ...@@ -2107,7 +2107,7 @@ CONFIG_EXTRA="
huffyuvencdsp huffyuvencdsp
idctdsp idctdsp
iirfilter iirfilter
imdct15 mdct15
intrax8 intrax8
iso_media iso_media
ividsp ividsp
...@@ -2349,7 +2349,7 @@ vc1dsp_select="h264chroma qpeldsp startcode" ...@@ -2349,7 +2349,7 @@ vc1dsp_select="h264chroma qpeldsp startcode"
rdft_select="fft" rdft_select="fft"
# decoders / encoders # decoders / encoders
aac_decoder_select="imdct15 mdct sinewin" aac_decoder_select="mdct15 mdct sinewin"
aac_fixed_decoder_select="mdct sinewin" aac_fixed_decoder_select="mdct sinewin"
aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin" aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin"
aac_latm_decoder_select="aac_decoder aac_latm_parser" aac_latm_decoder_select="aac_decoder aac_latm_parser"
...@@ -2491,7 +2491,7 @@ nellymoser_encoder_select="audio_frame_queue mdct sinewin" ...@@ -2491,7 +2491,7 @@ nellymoser_encoder_select="audio_frame_queue mdct sinewin"
nuv_decoder_select="idctdsp lzo" nuv_decoder_select="idctdsp lzo"
on2avc_decoder_select="mdct" on2avc_decoder_select="mdct"
opus_decoder_deps="swresample" opus_decoder_deps="swresample"
opus_decoder_select="imdct15" opus_decoder_select="mdct15"
png_decoder_select="zlib" png_decoder_select="zlib"
png_encoder_select="llvidencdsp zlib" png_encoder_select="llvidencdsp zlib"
prores_decoder_select="blockdsp idctdsp" prores_decoder_select="blockdsp idctdsp"
......
...@@ -84,7 +84,7 @@ OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o ...@@ -84,7 +84,7 @@ OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o
OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o
OBJS-$(CONFIG_IDCTDSP) += idctdsp.o simple_idct.o jrevdct.o OBJS-$(CONFIG_IDCTDSP) += idctdsp.o simple_idct.o jrevdct.o
OBJS-$(CONFIG_IIRFILTER) += iirfilter.o OBJS-$(CONFIG_IIRFILTER) += iirfilter.o
OBJS-$(CONFIG_IMDCT15) += imdct15.o OBJS-$(CONFIG_MDCT15) += mdct15.o
OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o
OBJS-$(CONFIG_IVIDSP) += ivi_dsp.o OBJS-$(CONFIG_IVIDSP) += ivi_dsp.o
OBJS-$(CONFIG_JNI) += ffjni.o jni.o OBJS-$(CONFIG_JNI) += ffjni.o jni.o
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#include "libavutil/fixed_dsp.h" #include "libavutil/fixed_dsp.h"
#include "avcodec.h" #include "avcodec.h"
#if !USE_FIXED #if !USE_FIXED
#include "imdct15.h" #include "mdct15.h"
#endif #endif
#include "fft.h" #include "fft.h"
#include "mpeg4audio.h" #include "mpeg4audio.h"
...@@ -327,7 +327,7 @@ struct AACContext { ...@@ -327,7 +327,7 @@ struct AACContext {
#if USE_FIXED #if USE_FIXED
AVFixedDSPContext *fdsp; AVFixedDSPContext *fdsp;
#else #else
IMDCT15Context *mdct480; MDCT15Context *mdct480;
AVFloatDSPContext *fdsp; AVFloatDSPContext *fdsp;
#endif /* USE_FIXED */ #endif /* USE_FIXED */
int random_state; int random_state;
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
#include "internal.h" #include "internal.h"
#include "get_bits.h" #include "get_bits.h"
#include "fft.h" #include "fft.h"
#include "imdct15.h" #include "mdct15.h"
#include "lpc.h" #include "lpc.h"
#include "kbdwin.h" #include "kbdwin.h"
#include "sinewin.h" #include "sinewin.h"
......
...@@ -1207,7 +1207,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) ...@@ -1207,7 +1207,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0)); AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0));
AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0)); AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0));
#if !USE_FIXED #if !USE_FIXED
ret = ff_imdct15_init(&ac->mdct480, 5); ret = ff_mdct15_init(&ac->mdct480, 1, 5, -1.0f);
if (ret < 0) if (ret < 0)
return ret; return ret;
#endif #endif
...@@ -3217,7 +3217,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) ...@@ -3217,7 +3217,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx)
ff_mdct_end(&ac->mdct_ld); ff_mdct_end(&ac->mdct_ld);
ff_mdct_end(&ac->mdct_ltp); ff_mdct_end(&ac->mdct_ltp);
#if !USE_FIXED #if !USE_FIXED
ff_imdct15_uninit(&ac->mdct480); ff_mdct15_uninit(&ac->mdct480);
#endif #endif
av_freep(&ac->fdsp); av_freep(&ac->fdsp);
return 0; return 0;
......
...@@ -33,7 +33,8 @@ ...@@ -33,7 +33,8 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/common.h" #include "libavutil/common.h"
#include "imdct15.h" #include "avfft.h"
#include "mdct15.h"
// complex c = a * b // complex c = a * b
#define CMUL3(cre, cim, are, aim, bre, bim) \ #define CMUL3(cre, cim, are, aim, bre, bim) \
...@@ -44,9 +45,9 @@ do { \ ...@@ -44,9 +45,9 @@ do { \
#define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) #define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
av_cold void ff_imdct15_uninit(IMDCT15Context **ps) av_cold void ff_mdct15_uninit(MDCT15Context **ps)
{ {
IMDCT15Context *s = *ps; MDCT15Context *s = *ps;
if (!s) if (!s)
return; return;
...@@ -61,10 +62,12 @@ av_cold void ff_imdct15_uninit(IMDCT15Context **ps) ...@@ -61,10 +62,12 @@ av_cold void ff_imdct15_uninit(IMDCT15Context **ps)
av_freep(ps); av_freep(ps);
} }
static void imdct15_half(IMDCT15Context *s, float *dst, const float *src, static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride);
static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
ptrdiff_t stride, float scale); ptrdiff_t stride, float scale);
static inline int init_pfa_reindex_tabs(IMDCT15Context *s) static inline int init_pfa_reindex_tabs(MDCT15Context *s)
{ {
int i, j; int i, j;
const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */ const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */
...@@ -85,7 +88,7 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s) ...@@ -85,7 +88,7 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s)
for (j = 0; j < 15; j++) { for (j = 0; j < 15; j++) {
const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo; const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo;
const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo; const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo;
const int k_pre = 15*i + (j - q_pre*15)*l_ptwo; const int k_pre = 15*i + ((j - q_pre*15) << b_ptwo);
const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo; const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo;
s->pfa_prereindex[i*15 + j] = k_pre; s->pfa_prereindex[i*15 + j] = k_pre;
s->pfa_postreindex[k_post] = l_ptwo*j + i; s->pfa_postreindex[k_post] = l_ptwo*j + i;
...@@ -95,9 +98,10 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s) ...@@ -95,9 +98,10 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s)
return 0; return 0;
} }
av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale)
{ {
IMDCT15Context *s; MDCT15Context *s;
double alpha, theta;
int len2 = 15 * (1 << N); int len2 = 15 * (1 << N);
int len = 2 * len2; int len = 2 * len2;
int i; int i;
...@@ -113,9 +117,11 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) ...@@ -113,9 +117,11 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N)
s->fft_n = N - 1; s->fft_n = N - 1;
s->len4 = len2 / 2; s->len4 = len2 / 2;
s->len2 = len2; s->len2 = len2;
s->inverse = inverse;
s->mdct = mdct15;
s->imdct_half = imdct15_half; s->imdct_half = imdct15_half;
if (ff_fft_init(&s->ptwo_fft, N - 1, 1) < 0) if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0)
goto fail; goto fail;
if (init_pfa_reindex_tabs(s)) if (init_pfa_reindex_tabs(s))
...@@ -129,15 +135,20 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) ...@@ -129,15 +135,20 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N)
if (!s->twiddle_exptab) if (!s->twiddle_exptab)
goto fail; goto fail;
theta = 0.125f + (scale < 0 ? s->len4 : 0);
scale = sqrt(fabs(scale));
for (i = 0; i < s->len4; i++) { for (i = 0; i < s->len4; i++) {
s->twiddle_exptab[i].re = cos(2 * M_PI * (i + 0.125f + s->len4) / len); alpha = 2 * M_PI * (i + theta) / len;
s->twiddle_exptab[i].im = sin(2 * M_PI * (i + 0.125f + s->len4) / len); s->twiddle_exptab[i].re = cos(alpha) * scale;
s->twiddle_exptab[i].im = sin(alpha) * scale;
} }
/* 15-point FFT exptab */ /* 15-point FFT exptab */
for (i = 0; i < 19; i++) { for (i = 0; i < 19; i++) {
if (i < 15) { if (i < 15) {
double theta = (2.0f * M_PI * i) / 15.0f; double theta = (2.0f * M_PI * i) / 15.0f;
if (!s->inverse)
theta *= -1;
s->exptab[i].re = cos(theta); s->exptab[i].re = cos(theta);
s->exptab[i].im = sin(theta); s->exptab[i].im = sin(theta);
} else { /* Wrap around to simplify fft15 */ } else { /* Wrap around to simplify fft15 */
...@@ -152,15 +163,17 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) ...@@ -152,15 +163,17 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N)
s->exptab[20].im = sin(1.0f * M_PI / 5.0f); s->exptab[20].im = sin(1.0f * M_PI / 5.0f);
/* Invert the phase for an inverse transform, do nothing for a forward transform */ /* Invert the phase for an inverse transform, do nothing for a forward transform */
s->exptab[19].im *= -1; if (s->inverse) {
s->exptab[20].im *= -1; s->exptab[19].im *= -1;
s->exptab[20].im *= -1;
}
*ps = s; *ps = s;
return 0; return 0;
fail: fail:
ff_imdct15_uninit(&s); ff_mdct15_uninit(&s);
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
...@@ -211,8 +224,7 @@ static inline void fft5(const FFTComplex exptab[2], FFTComplex *out, ...@@ -211,8 +224,7 @@ static inline void fft5(const FFTComplex exptab[2], FFTComplex *out,
out[4].im = in[0].im + z0[3].im; out[4].im = in[0].im + z0[3].im;
} }
static inline void fft15(const FFTComplex exptab[22], FFTComplex *out, static void fft15(const FFTComplex exptab[22], FFTComplex *out, const FFTComplex *in, size_t stride)
const FFTComplex *in, size_t stride)
{ {
int k; int k;
FFTComplex tmp1[5], tmp2[5], tmp3[5]; FFTComplex tmp1[5], tmp2[5], tmp3[5];
...@@ -241,7 +253,51 @@ static inline void fft15(const FFTComplex exptab[22], FFTComplex *out, ...@@ -241,7 +253,51 @@ static inline void fft15(const FFTComplex exptab[22], FFTComplex *out,
} }
} }
static void imdct15_half(IMDCT15Context *s, float *dst, const float *src, static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride)
{
int i, j;
const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1;
const int l_ptwo = 1 << s->ptwo_fft.nbits;
FFTComplex fft15in[15];
/* Folding and pre-reindexing */
for (i = 0; i < l_ptwo; i++) {
for (j = 0; j < 15; j++) {
float re, im;
const int k = s->pfa_prereindex[i*15 + j];
if (k < len8) {
re = -src[2*k+len3] - src[len3-1-2*k];
im = -src[len4+2*k] + src[len4-1-2*k];
} else {
re = src[2*k-len4] - src[1*len3-1-2*k];
im = -src[2*k+len4] - src[5*len4-1-2*k];
}
CMUL3(fft15in[j].re, fft15in[j].im, re, im, s->twiddle_exptab[k].re, -s->twiddle_exptab[k].im);
}
fft15(s->exptab, s->tmp + s->ptwo_fft.revtab[i], fft15in, l_ptwo);
}
/* Then a 15xN FFT (where N is a power of two) */
for (i = 0; i < 15; i++)
s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i);
/* Reindex again, apply twiddles and output */
for (i = 0; i < len8; i++) {
float re0, im0, re1, im1;
const int i0 = len8 + i, i1 = len8 - i - 1;
const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1];
CMUL3(im1, re0, s->tmp[s1].re, s->tmp[s1].im, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re);
CMUL3(im0, re1, s->tmp[s0].re, s->tmp[s0].im, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re);
dst[2*i1*stride ] = re0;
dst[2*i1*stride + stride] = im0;
dst[2*i0*stride ] = re1;
dst[2*i0*stride + stride] = im1;
}
}
static void imdct15_half(MDCT15Context *s, float *dst, const float *src,
ptrdiff_t stride, float scale) ptrdiff_t stride, float scale)
{ {
FFTComplex fft15in[15]; FFTComplex fft15in[15];
......
/* /*
* Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
*
* This file is part of FFmpeg. * This file is part of FFmpeg.
* *
* FFmpeg is free software; you can redistribute it and/or * FFmpeg is free software; you can redistribute it and/or
...@@ -16,17 +18,18 @@ ...@@ -16,17 +18,18 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#ifndef AVCODEC_IMDCT15_H #ifndef AVCODEC_MDCT15_H
#define AVCODEC_IMDCT15_H #define AVCODEC_MDCT15_H
#include <stddef.h> #include <stddef.h>
#include "fft.h" #include "fft.h"
typedef struct IMDCT15Context { typedef struct MDCT15Context {
int fft_n; int fft_n;
int len2; int len2;
int len4; int len4;
int inverse;
int *pfa_prereindex; int *pfa_prereindex;
int *pfa_postreindex; int *pfa_postreindex;
...@@ -39,21 +42,26 @@ typedef struct IMDCT15Context { ...@@ -39,21 +42,26 @@ typedef struct IMDCT15Context {
/* 0 - 18: fft15 twiddles, 19 - 20: fft5 twiddles */ /* 0 - 18: fft15 twiddles, 19 - 20: fft5 twiddles */
FFTComplex exptab[21]; FFTComplex exptab[21];
/**
* Calculate a full 2N -> N MDCT
*/
void (*mdct)(struct MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride);
/** /**
* Calculate the middle half of the iMDCT * Calculate the middle half of the iMDCT
*/ */
void (*imdct_half)(struct IMDCT15Context *s, float *dst, const float *src, void (*imdct_half)(struct MDCT15Context *s, float *dst, const float *src,
ptrdiff_t src_stride, float scale); ptrdiff_t src_stride, float scale);
} IMDCT15Context; } MDCT15Context;
/** /**
* Init an iMDCT of the length 2 * 15 * (2^N) * Init an (i)MDCT of the length 2 * 15 * (2^N)
*/ */
int ff_imdct15_init(IMDCT15Context **s, int N); int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale);
/** /**
* Free an iMDCT. * Frees a context
*/ */
void ff_imdct15_uninit(IMDCT15Context **s); void ff_mdct15_uninit(MDCT15Context **ps);
#endif /* AVCODEC_IMDCT15_H */ #endif /* AVCODEC_MDCT15_H */
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include "libavutil/float_dsp.h" #include "libavutil/float_dsp.h"
#include "libavutil/libm.h" #include "libavutil/libm.h"
#include "imdct15.h" #include "mdct15.h"
#include "opus.h" #include "opus.h"
#include "opustab.h" #include "opustab.h"
...@@ -63,7 +63,7 @@ typedef struct CeltFrame { ...@@ -63,7 +63,7 @@ typedef struct CeltFrame {
struct CeltContext { struct CeltContext {
// constant values that do not change during context lifetime // constant values that do not change during context lifetime
AVCodecContext *avctx; AVCodecContext *avctx;
IMDCT15Context *imdct[4]; MDCT15Context *imdct[4];
AVFloatDSPContext *dsp; AVFloatDSPContext *dsp;
int output_channels; int output_channels;
...@@ -1596,7 +1596,7 @@ int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc, ...@@ -1596,7 +1596,7 @@ int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc,
int silence = 0; int silence = 0;
int transient = 0; int transient = 0;
int anticollapse = 0; int anticollapse = 0;
IMDCT15Context *imdct; MDCT15Context *imdct;
float imdct_scale = 1.0; float imdct_scale = 1.0;
if (coded_channels != 1 && coded_channels != 2) { if (coded_channels != 1 && coded_channels != 2) {
...@@ -1792,7 +1792,7 @@ void ff_celt_free(CeltContext **ps) ...@@ -1792,7 +1792,7 @@ void ff_celt_free(CeltContext **ps)
return; return;
for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++)
ff_imdct15_uninit(&s->imdct[i]); ff_mdct15_uninit(&s->imdct[i]);
av_freep(&s->dsp); av_freep(&s->dsp);
av_freep(ps); av_freep(ps);
...@@ -1817,7 +1817,7 @@ int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels) ...@@ -1817,7 +1817,7 @@ int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels)
s->output_channels = output_channels; s->output_channels = output_channels;
for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) { for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) {
ret = ff_imdct15_init(&s->imdct[i], i + 3); ret = ff_mdct15_init(&s->imdct[i], 1, i + 3, -1.0f);
if (ret < 0) if (ret < 0)
goto fail; goto fail;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment