Commit ed492b61 authored by Alex Converse's avatar Alex Converse

Add an HE-AAC v1 decoder.

A large portion of this code was orignally authored by Robert Swain. The rest
was written by me. Full history is available at:
svn://svn.ffmpeg.org/soc/aac-sbr
http://github.com/aconverse/ffmpeg-heaac/tree/sbr_pub

Originally committed as revision 22316 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent f19341e1
......@@ -61,6 +61,7 @@ version <next>:
- FFprobe tool
- AMR-NB decoder
- RTSP muxer
- HE-AAC v1 decoder
......
......@@ -41,7 +41,7 @@ OBJS-$(CONFIG_VAAPI) += vaapi.o
OBJS-$(CONFIG_VDPAU) += vdpau.o
# decoders/encoders/hardware accelerators
OBJS-$(CONFIG_AAC_DECODER) += aac.o aactab.o
OBJS-$(CONFIG_AAC_DECODER) += aac.o aactab.o aacsbr.o
OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o \
aacpsy.o aactab.o \
psymodel.o iirfilter.o \
......
......@@ -62,7 +62,7 @@
* N MIDI
* N Harmonic and Individual Lines plus Noise
* N Text-To-Speech Interface
* N (in progress) Spectral Band Replication
* Y Spectral Band Replication
* Y (not in this code) Layer-1
* Y (not in this code) Layer-2
* Y (not in this code) Layer-3
......@@ -86,6 +86,8 @@
#include "aac.h"
#include "aactab.h"
#include "aacdectab.h"
#include "sbr.h"
#include "aacsbr.h"
#include "mpeg4audio.h"
#include "aac_parser.h"
......@@ -180,14 +182,18 @@ static av_cold int che_configure(AACContext *ac,
if (che_pos[type][id]) {
if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
return AVERROR(ENOMEM);
ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
if (type != TYPE_CCE) {
ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
if (type == TYPE_CPE) {
ac->output_data[(*channels)++] = ac->che[type][id]->ch[1].ret;
}
}
} else
} else {
if (ac->che[type][id])
ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
av_freep(&ac->che[type][id]);
}
return 0;
}
......@@ -530,6 +536,8 @@ static av_cold int aac_decode_init(AVCodecContext *avccontext)
AAC_INIT_VLC_STATIC( 9, 366);
AAC_INIT_VLC_STATIC(10, 462);
ff_aac_sbr_init();
dsputil_init(&ac->dsp, avccontext);
ac->random_state = 0x1f2e3d4c;
......@@ -1544,23 +1552,6 @@ static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
return 0;
}
/**
* Decode Spectral Band Replication extension data; reference: table 4.55.
*
* @param crc flag indicating the presence of CRC checksum
* @param cnt length of TYPE_FIL syntactic element in bytes
*
* @return Returns number of bytes consumed from the TYPE_FIL element.
*/
static int decode_sbr_extension(AACContext *ac, GetBitContext *gb,
int crc, int cnt)
{
// TODO : sbr_extension implementation
av_log_missing_feature(ac->avccontext, "SBR", 0);
skip_bits_long(gb, 8 * cnt - 4); // -4 due to reading extension type
return cnt;
}
/**
* Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
*
......@@ -1641,7 +1632,8 @@ static int decode_dynamic_range(DynamicRangeControl *che_drc,
*
* @return Returns number of bytes consumed
*/
static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt)
static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
ChannelElement *che, enum RawDataBlockType elem_type)
{
int crc_flag = 0;
int res = cnt;
......@@ -1649,7 +1641,21 @@ static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt)
case EXT_SBR_DATA_CRC:
crc_flag++;
case EXT_SBR_DATA:
res = decode_sbr_extension(ac, gb, crc_flag, cnt);
if (!che) {
av_log(ac->avccontext, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
return res;
} else if (!ac->m4ac.sbr) {
av_log(ac->avccontext, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
skip_bits_long(gb, 8 * cnt - 4);
return res;
} else if (ac->m4ac.sbr == -1 && ac->output_configured == OC_LOCKED) {
av_log(ac->avccontext, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
skip_bits_long(gb, 8 * cnt - 4);
return res;
} else {
ac->m4ac.sbr = 1;
}
res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
break;
case EXT_DYNAMIC_RANGE:
res = decode_dynamic_range(&ac->che_drc, gb, cnt);
......@@ -1830,8 +1836,9 @@ static void apply_independent_coupling(AACContext *ac,
const float bias = ac->add_bias;
const float *src = cce->ch[0].ret;
float *dest = target->ret;
const int len = 1024 << (ac->m4ac.sbr == 1);
for (i = 0; i < 1024; i++)
for (i = 0; i < len; i++)
dest[i] += gain * (src[i] - bias);
}
......@@ -1889,10 +1896,18 @@ static void spectral_to_sample(AACContext *ac)
apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
if (type <= TYPE_CPE)
apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT)
if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
imdct_and_windowing(ac, &che->ch[0]);
if (type == TYPE_CPE)
if (ac->m4ac.sbr > 0) {
ff_sbr_dequant(ac, &che->sbr, type == TYPE_CPE ? TYPE_CPE : TYPE_SCE);
ff_sbr_apply(ac, &che->sbr, 0, che->ch[0].ret, che->ch[0].ret);
}
}
if (type == TYPE_CPE) {
imdct_and_windowing(ac, &che->ch[1]);
if (ac->m4ac.sbr > 0)
ff_sbr_apply(ac, &che->sbr, 1, che->ch[1].ret, che->ch[1].ret);
}
if (type <= TYPE_CCE)
apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
}
......@@ -1942,9 +1957,9 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
AACContext *ac = avccontext->priv_data;
ChannelElement *che = NULL;
ChannelElement *che = NULL, *che_prev = NULL;
GetBitContext gb;
enum RawDataBlockType elem_type;
enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
int err, elem_id, data_size_tmp;
int buf_consumed;
int samples = 1024, multiplier;
......@@ -2014,7 +2029,7 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
return -1;
}
while (elem_id > 0)
elem_id -= decode_extension_payload(ac, &gb, elem_id);
elem_id -= decode_extension_payload(ac, &gb, elem_id, che_prev, elem_type_prev);
err = 0; /* FIXME */
break;
......@@ -2023,6 +2038,9 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
break;
}
che_prev = che;
elem_type_prev = elem_type;
if (err)
return err;
......@@ -2034,14 +2052,14 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
spectral_to_sample(ac);
multiplier = 0;
multiplier = (ac->m4ac.sbr == 1) ? ac->m4ac.ext_sample_rate > ac->m4ac.sample_rate : 0;
samples <<= multiplier;
if (ac->output_configured < OC_LOCKED) {
avccontext->sample_rate = ac->m4ac.sample_rate << multiplier;
avccontext->frame_size = samples;
}
data_size_tmp = 1024 * avccontext->channels * sizeof(int16_t);
data_size_tmp = samples * avccontext->channels * sizeof(int16_t);
if (*data_size < data_size_tmp) {
av_log(avccontext, AV_LOG_ERROR,
"Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
......@@ -2050,7 +2068,7 @@ static int aac_decode_frame(AVCodecContext *avccontext, void *data,
}
*data_size = data_size_tmp;
ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);
ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avccontext->channels);
if (ac->output_configured)
ac->output_configured = OC_LOCKED;
......@@ -2065,8 +2083,11 @@ static av_cold int aac_decode_close(AVCodecContext *avccontext)
int i, type;
for (i = 0; i < MAX_ELEM_ID; i++) {
for (type = 0; type < 4; type++)
for (type = 0; type < 4; type++) {
if (ac->che[type][i])
ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
av_freep(&ac->che[type][i]);
}
}
ff_mdct_end(&ac->mdct);
......
......@@ -34,6 +34,7 @@
#include "dsputil.h"
#include "fft.h"
#include "mpeg4audio.h"
#include "sbr.h"
#include <stdint.h>
......@@ -217,7 +218,7 @@ typedef struct {
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
DECLARE_ALIGNED(16, float, coeffs)[1024]; ///< coefficients for IMDCT
DECLARE_ALIGNED(16, float, saved)[1024]; ///< overlap
DECLARE_ALIGNED(16, float, ret)[1024]; ///< PCM output
DECLARE_ALIGNED(16, float, ret)[2048]; ///< PCM output
PredictorState predictor_state[MAX_PREDICTORS];
} SingleChannelElement;
......@@ -233,6 +234,7 @@ typedef struct {
SingleChannelElement ch[2];
// CCE specific
ChannelCoupling coup;
SpectralBandReplication sbr;
} ChannelElement;
/**
......
This diff is collapsed.
/*
* AAC Spectral Band Replication function declarations
* Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file libavcodec/aacsbr.h
* AAC Spectral Band Replication function declarations
* @author Robert Swain ( rob opendot cl )
*/
#ifndef AVCODEC_AACSBR_H
#define AVCODEC_AACSBR_H
#include "get_bits.h"
#include "aac.h"
#include "sbr.h"
/** Initialize SBR. */
av_cold void ff_aac_sbr_init(void);
/** Initialize one SBR context. */
av_cold void ff_aac_sbr_ctx_init(SpectralBandReplication *sbr);
/** Close one SBR context. */
av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr);
/** Decode one SBR element. */
int ff_decode_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
GetBitContext *gb, int crc, int cnt, int id_aac);
/** Dequantized all channels in one SBR element. */
void ff_sbr_dequant(AACContext *ac, SpectralBandReplication *sbr, int id_aac);
/** Apply dequantized SBR to a single AAC channel. */
void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int ch,
const float* in, float* out);
#endif /* AVCODEC_AACSBR_H */
This diff is collapsed.
......@@ -30,7 +30,7 @@
#include "libavutil/avutil.h"
#define LIBAVCODEC_VERSION_MAJOR 52
#define LIBAVCODEC_VERSION_MINOR 56
#define LIBAVCODEC_VERSION_MINOR 57
#define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
......
......@@ -57,7 +57,7 @@ enum AudioObjectType {
AOT_AAC_LC, ///< Y Low Complexity
AOT_AAC_SSR, ///< N (code in SoC repo) Scalable Sample Rate
AOT_AAC_LTP, ///< N (code in SoC repo) Long Term Prediction
AOT_SBR, ///< N (in progress) Spectral Band Replication
AOT_SBR, ///< Y Spectral Band Replication
AOT_AAC_SCALABLE, ///< N Scalable
AOT_TWINVQ, ///< N Twin Vector Quantizer
AOT_CELP, ///< N Code Excited Linear Prediction
......
/*
* Spectral Band Replication definitions and structures
* Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
* Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file libavcodec/sbr.h
* Spectral Band Replication definitions and structures
* @author Robert Swain ( rob opendot cl )
*/
#ifndef AVCODEC_SBR_H
#define AVCODEC_SBR_H
#include <stdint.h>
#include "fft.h"
/**
* Spectral Band Replication header - spectrum parameters that invoke a reset if they differ from the previous header.
*/
typedef struct {
uint8_t bs_start_freq;
uint8_t bs_stop_freq;
uint8_t bs_xover_band;
/**
* @defgroup bs_header_extra_1 Variables associated with bs_header_extra_1
* @{
*/
uint8_t bs_freq_scale;
uint8_t bs_alter_scale;
uint8_t bs_noise_bands;
/** @} */
} SpectrumParameters;
#define SBR_SYNTHESIS_BUF_SIZE ((1280-128)*2)
/**
* Spectral Band Replication per channel data
*/
typedef struct {
/**
* @defgroup bitstream Main bitstream data variables
* @{
*/
unsigned bs_frame_class;
unsigned bs_add_harmonic_flag;
unsigned bs_num_env[2];
uint8_t bs_freq_res[7];
uint8_t bs_var_bord[2];
uint8_t bs_num_rel[2];
uint8_t bs_rel_bord[2][3];
unsigned bs_pointer;
unsigned bs_num_noise;
uint8_t bs_df_env[5];
uint8_t bs_df_noise[2];
uint8_t bs_invf_mode[2][5];
uint8_t bs_add_harmonic[48];
unsigned bs_amp_res;
/** @} */
/**
* @defgroup state State variables
* @{
*/
DECLARE_ALIGNED(16, float, synthesis_filterbank_samples)[SBR_SYNTHESIS_BUF_SIZE];
DECLARE_ALIGNED(16, float, analysis_filterbank_samples) [1312];
int synthesis_filterbank_samples_offset;
///l_APrev and l_A
int e_a[2];
///Chirp factors
float bw_array[5];
///QMF values of the original signal
float W[2][32][32][2];
///QMF output of the HF adjustor
float Y[2][38][64][2];
float g_temp[42][48];
float q_temp[42][48];
uint8_t s_indexmapped[8][48];
///Envelope scalefactors
float env_facs[6][48];
///Noise scalefactors
float noise_facs[3][5];
///Envelope time borders
uint8_t t_env[8];
///Envelope time border of the last envelope of the previous frame
uint8_t t_env_num_env_old;
///Noise time borders
uint8_t t_q[3];
unsigned f_indexnoise;
unsigned f_indexsine;
/** @} */
} SBRData;
/**
* Spectral Band Replication
*/
typedef struct {
int sample_rate;
int start;
int reset;
SpectrumParameters spectrum_params;
int bs_amp_res_header;
/**
* @defgroup bs_header_extra_2 variables associated with bs_header_extra_2
* @{
*/
unsigned bs_limiter_bands;
unsigned bs_limiter_gains;
unsigned bs_interpol_freq;
unsigned bs_smoothing_mode;
/** @} */
unsigned bs_coupling;
unsigned k[5]; ///< k0, k1, k2
///kx', and kx respectively, kx is the first QMF subband where SBR is used.
///kx' is its value from the previous frame
unsigned kx[2];
///M' and M respectively, M is the number of QMF subbands that use SBR.
unsigned m[2];
///The number of frequency bands in f_master
unsigned n_master;
SBRData data[2];
///N_Low and N_High respectively, the number of frequency bands for low and high resolution
unsigned n[2];
///Number of noise floor bands
unsigned n_q;
///Number of limiter bands
unsigned n_lim;
///The master QMF frequency grouping
uint16_t f_master[49];
///Frequency borders for low resolution SBR
uint16_t f_tablelow[25];
///Frequency borders for high resolution SBR
uint16_t f_tablehigh[49];
///Frequency borders for noise floors
uint16_t f_tablenoise[6];
///Frequency borders for the limiter
uint16_t f_tablelim[29];
unsigned num_patches;
uint8_t patch_num_subbands[6];
uint8_t patch_start_subband[6];
///QMF low frequency input to the HF generator
float X_low[32][40][2];
///QMF output of the HF generator
float X_high[64][40][2];
///QMF values of the reconstructed signal
DECLARE_ALIGNED(16, float, X)[2][32][64];
///Zeroth coefficient used to filter the subband signals
float alpha0[64][2];
///First coefficient used to filter the subband signals
float alpha1[64][2];
///Dequantized envelope scalefactors, remapped
float e_origmapped[7][48];
///Dequantized noise scalefactors, remapped
float q_mapped[7][48];
///Sinusoidal presence, remapped
uint8_t s_mapped[7][48];
///Estimated envelope
float e_curr[7][48];
///Amplitude adjusted noise scalefactors
float q_m[7][48];
///Sinusoidal levels
float s_m[7][48];
float gain[7][48];
DECLARE_ALIGNED(16, float, qmf_filter_scratch)[5][64];
RDFTContext rdft;
FFTContext mdct;
} SpectralBandReplication;
#endif /* AVCODEC_SBR_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment