Commit 78e65cd7 authored by Alex Converse's avatar Alex Converse

Merge the AAC encoder from SoC svn. It is still considered experimental.

Originally committed as revision 19375 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 5e039e1b
......@@ -27,6 +27,7 @@ version <next>:
- Electronic Arts Madcow decoder
- DivX (XSUB) subtitle encoder
- nonfree libamr support for AMR-NB/WB decoding/encoding removed
- Experimental AAC encoder
......
......@@ -36,6 +36,7 @@ OBJS-$(CONFIG_VDPAU) += vdpau.o
# decoders/encoders/hardware accelerators
OBJS-$(CONFIG_AAC_DECODER) += aac.o aactab.o mpeg4audio.o aac_parser.o aac_ac3_parser.o
OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o aacpsy.o aactab.o psymodel.o iirfilter.o mdct.o fft.o mpeg4audio.o
OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o
OBJS-$(CONFIG_AC3_DECODER) += eac3dec.o ac3dec.o ac3tab.o ac3dec_data.o ac3.o
OBJS-$(CONFIG_AC3_ENCODER) += ac3enc.o ac3tab.o ac3.o
......
......@@ -116,6 +116,12 @@ typedef struct {
#define MAX_PREDICTORS 672
#define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times
#define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0
#define SCALE_MAX_POS 255 ///< scalefactor index maximum value
#define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard
#define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference
/**
* Individual Channel Stream
*/
......@@ -126,6 +132,7 @@ typedef struct {
int num_window_groups;
uint8_t group_len[8];
const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window
int num_swb; ///< number of scalefactor window bands
int num_windows;
int tns_max_bands;
......@@ -165,6 +172,7 @@ typedef struct {
typedef struct {
int num_pulse;
int start;
int pos[4];
int amp[4];
} Pulse;
......@@ -189,11 +197,14 @@ typedef struct {
typedef struct {
IndividualChannelStream ics;
TemporalNoiseShaping tns;
enum BandType band_type[120]; ///< band types
Pulse pulse;
enum BandType band_type[128]; ///< band types
int band_type_run_end[120]; ///< band type run end points
float sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
DECLARE_ALIGNED_16(float, coeffs[1024]); ///< coefficients for IMDCT
DECLARE_ALIGNED_16(float, saved[512]); ///< overlap
DECLARE_ALIGNED_16(float, saved[1024]); ///< overlap
DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output
PredictorState predictor_state[MAX_PREDICTORS];
} SingleChannelElement;
......@@ -203,7 +214,9 @@ typedef struct {
*/
typedef struct {
// CPE specific
uint8_t ms_mask[120]; ///< Set if mid/side stereo is used for each scalefactor window band
int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder)
uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band
// shared
SingleChannelElement ch[2];
// CCE specific
......
This diff is collapsed.
This diff is collapsed.
/*
* AAC encoder
* Copyright (C) 2008 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AACENC_H
#define AVCODEC_AACENC_H
#include "avcodec.h"
#include "put_bits.h"
#include "dsputil.h"
#include "aac.h"
#include "psymodel.h"
struct AACEncContext;
typedef struct AACCoefficientsEncoder{
void (*search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s,
SingleChannelElement *sce, const float lambda);
void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
int win, int group_len, const float lambda);
void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size,
int scale_idx, int cb, const float lambda);
void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);
}AACCoefficientsEncoder;
extern AACCoefficientsEncoder ff_aac_coders[];
/**
* AAC encoder context
*/
typedef struct AACEncContext {
PutBitContext pb;
MDCTContext mdct1024; ///< long (1024 samples) frame transform context
MDCTContext mdct128; ///< short (128 samples) frame transform context
DSPContext dsp;
DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
int16_t* samples; ///< saved preprocessed input
int samplerate_index; ///< MPEG-4 samplerate index
ChannelElement *cpe; ///< channel elements
FFPsyContext psy;
struct FFPsyPreprocessContext* psypp;
AACCoefficientsEncoder *coder;
int cur_channel;
int last_frame;
float lambda;
DECLARE_ALIGNED_16(int, qcoefs[96][2]); ///< quantized coefficients
DECLARE_ALIGNED_16(float, scoefs[1024]); ///< scaled coefficients
} AACEncContext;
#endif /* AVCODEC_AACENC_H */
This diff is collapsed.
......@@ -195,7 +195,7 @@ void avcodec_register_all(void)
REGISTER_ENCDEC (ZMBV, zmbv);
/* audio codecs */
REGISTER_DECODER (AAC, aac);
REGISTER_ENCDEC (AAC, aac);
REGISTER_ENCDEC (AC3, ac3);
REGISTER_ENCDEC (ALAC, alac);
REGISTER_DECODER (APE, ape);
......
/*
* audio encoder psychoacoustic model
* Copyright (C) 2008 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include "psymodel.h"
#include "iirfilter.h"
extern const FFPsyModel ff_aac_psy_model;
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
int num_lens,
const uint8_t **bands, const int* num_bands)
{
ctx->avctx = avctx;
ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels);
ctx->bands = av_malloc (sizeof(ctx->bands[0]) * num_lens);
ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens);
memcpy(ctx->bands, bands, sizeof(ctx->bands[0]) * num_lens);
memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) * num_lens);
switch(ctx->avctx->codec_id){
case CODEC_ID_AAC:
ctx->model = &ff_aac_psy_model;
break;
}
if(ctx->model->init)
return ctx->model->init(ctx);
return 0;
}
FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx,
const int16_t *audio, const int16_t *la,
int channel, int prev_type)
{
return ctx->model->window(ctx, audio, la, channel, prev_type);
}
void ff_psy_set_band_info(FFPsyContext *ctx, int channel,
const float *coeffs, FFPsyWindowInfo *wi)
{
ctx->model->analyze(ctx, channel, coeffs, wi);
}
av_cold void ff_psy_end(FFPsyContext *ctx)
{
if(ctx->model->end)
ctx->model->end(ctx);
av_freep(&ctx->bands);
av_freep(&ctx->num_bands);
av_freep(&ctx->psy_bands);
}
typedef struct FFPsyPreprocessContext{
AVCodecContext *avctx;
float stereo_att;
struct FFIIRFilterCoeffs *fcoeffs;
struct FFIIRFilterState **fstate;
}FFPsyPreprocessContext;
#define FILT_ORDER 4
av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx)
{
FFPsyPreprocessContext *ctx;
int i;
float cutoff_coeff;
ctx = av_mallocz(sizeof(FFPsyPreprocessContext));
ctx->avctx = avctx;
if(avctx->flags & CODEC_FLAG_QSCALE)
cutoff_coeff = 1.0f / av_clip(1 + avctx->global_quality / FF_QUALITY_SCALE, 1, 8);
else
cutoff_coeff = avctx->bit_rate / (4.0f * avctx->sample_rate * avctx->channels);
ctx->fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH, FF_FILTER_MODE_LOWPASS,
FILT_ORDER, cutoff_coeff, 0.0, 0.0);
if(ctx->fcoeffs){
ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels);
for(i = 0; i < avctx->channels; i++)
ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
}
return ctx;
}
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
const int16_t *audio, int16_t *dest,
int tag, int channels)
{
int ch, i;
if(ctx->fstate){
for(ch = 0; ch < channels; ch++){
ff_iir_filter(ctx->fcoeffs, ctx->fstate[tag+ch], ctx->avctx->frame_size,
audio + ch, ctx->avctx->channels,
dest + ch, ctx->avctx->channels);
}
}else{
for(ch = 0; ch < channels; ch++){
for(i = 0; i < ctx->avctx->frame_size; i++)
dest[i*ctx->avctx->channels + ch] = audio[i*ctx->avctx->channels + ch];
}
}
}
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
{
int i;
ff_iir_filter_free_coeffs(ctx->fcoeffs);
if (ctx->fstate)
for (i = 0; i < ctx->avctx->channels; i++)
ff_iir_filter_free_state(ctx->fstate[i]);
av_freep(&ctx->fstate);
}
/*
* audio encoder psychoacoustic model
* Copyright (C) 2008 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_PSYMODEL_H
#define AVCODEC_PSYMODEL_H
#include "avcodec.h"
/** maximum possible number of bands */
#define PSY_MAX_BANDS 128
/**
* single band psychoacoustic information
*/
typedef struct FFPsyBand{
int bits;
float energy;
float threshold;
float distortion;
float perceptual_weight;
}FFPsyBand;
/**
* windowing related information
*/
typedef struct FFPsyWindowInfo{
int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next
int window_shape; ///< window shape (sine/KBD/whatever)
int num_windows; ///< number of windows in a frame
int grouping[8]; ///< window grouping (for e.g. AAC)
int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA)
}FFPsyWindowInfo;
/**
* context used by psychoacoustic model
*/
typedef struct FFPsyContext{
AVCodecContext *avctx; ///< encoder context
const struct FFPsyModel *model; ///< encoder-specific model functions
FFPsyBand *psy_bands; ///< frame bands information
uint8_t **bands; ///< scalefactor band sizes for possible frame sizes
int *num_bands; ///< number of scalefactor bands for possible frame sizes
int num_lens; ///< number of scalefactor band sets
void* model_priv_data; ///< psychoacoustic model implementation private data
}FFPsyContext;
/**
* codec-specific psychoacoustic model implementation
*/
typedef struct FFPsyModel {
const char *name;
int (*init) (FFPsyContext *apc);
FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, FFPsyWindowInfo *wi);
void (*end) (FFPsyContext *apc);
}FFPsyModel;
/**
* Initialize psychoacoustic model.
*
* @param ctx model context
* @param avctx codec context
* @param num_lens number of possible frame lengths
* @param bands scalefactor band lengths for all frame lengths
* @param num_bands number of scalefactor bands for all frame lengths
*
* @return zero if successful, a negative value if not
*/
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
int num_lens,
const uint8_t **bands, const int* num_bands);
/**
* Suggest window sequence for channel.
*
* @param ctx model context
* @param audio samples for the current frame
* @param la lookahead samples (NULL when unavailable)
* @param channel number of channel element to analyze
* @param prev_type previous window type
*
* @return suggested window information in a structure
*/
FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx,
const int16_t *audio, const int16_t *la,
int channel, int prev_type);
/**
* Perform psychoacoustic analysis and set band info (threshold, energy).
*
* @param ctx model context
* @param channel audio channel number
* @param coeffs pointer to the transformed coefficients
* @param wi window information
*/
void ff_psy_set_band_info(FFPsyContext *ctx, int channel, const float *coeffs,
FFPsyWindowInfo *wi);
/**
* Cleanup model context at the end.
*
* @param ctx model context
*/
av_cold void ff_psy_end(FFPsyContext *ctx);
/**************************************************************************
* Audio preprocessing stuff. *
* This should be moved into some audio filter eventually. *
**************************************************************************/
struct FFPsyPreprocessContext;
/**
* psychoacoustic model audio preprocessing initialization
*/
av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx);
/**
* Preprocess several channel in audio frame in order to compress it better.
*
* @param ctx preprocessing context
* @param audio samples to preprocess
* @param dest place to put filtered samples
* @param tag channel number
* @param channels number of channel to preprocess (some additional work may be done on stereo pair)
*/
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx,
const int16_t *audio, int16_t *dest,
int tag, int channels);
/**
* Cleanup audio preprocessing module.
*/
av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx);
#endif /* AVCODEC_PSYMODEL_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment