Commit d140b025 authored by Vitor Sessak's avatar Vitor Sessak

SIPR16k decoder

Originally committed as revision 21234 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent d79c06b2
......@@ -48,7 +48,7 @@ version <next>:
- R210 decoder
- Auravision Aura 1 and 2 decoders
- Deluxe Paint Animation playback system
- SIPR decoding for modes 8k5, 6k5 and 5k0
- SIPR decoder
- Adobe Filmstrip muxer and demuxer
......
......@@ -620,7 +620,6 @@ following image formats are supported:
@item RealAudio 3.0 (dnet) @tab IX @tab X
@tab Real low bitrate AC-3 codec
@item RealAudio SIPR / ACELP.NET @tab @tab X
@tab 16 kbps mode not yet supported
@item Shorten @tab @tab X
@item Sierra VMD audio @tab @tab X
@tab Used in Sierra VMD files.
......
......@@ -263,7 +263,8 @@ OBJS-$(CONFIG_SGI_ENCODER) += sgienc.o rle.o
OBJS-$(CONFIG_SHORTEN_DECODER) += shorten.o
OBJS-$(CONFIG_SIPR_DECODER) += sipr.o acelp_pitch_delay.o \
celp_math.o acelp_vectors.o \
acelp_filters.o celp_filters.o lsp.o
acelp_filters.o celp_filters.o lsp.o \
sipr16k.o
OBJS-$(CONFIG_SMACKAUD_DECODER) += smacker.o
OBJS-$(CONFIG_SMACKER_DECODER) += smacker.o
OBJS-$(CONFIG_SMC_DECODER) += smc.o
......
......@@ -30,7 +30,7 @@
#include "libavutil/avutil.h"
#define LIBAVCODEC_VERSION_MAJOR 52
#define LIBAVCODEC_VERSION_MINOR 47
#define LIBAVCODEC_VERSION_MINOR 48
#define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
......
......@@ -51,6 +51,7 @@ typedef struct {
/* bitstream parameters */
uint8_t number_of_fc_indexes;
uint8_t ma_predictor_bits; ///< size in bits of the switched MA predictor
/** size in bits of the i-th stage vector of quantizer */
uint8_t vq_indexes_bits[5];
......@@ -64,6 +65,22 @@ typedef struct {
} SiprModeParam;
static const SiprModeParam modes[MODE_COUNT] = {
[MODE_16k] = {
.mode_name = "16k",
.bits_per_frame = 160,
.subframe_count = SUBFRAME_COUNT_16k,
.frames_per_packet = 1,
.pitch_sharp_factor = 0.00,
.number_of_fc_indexes = 10,
.ma_predictor_bits = 1,
.vq_indexes_bits = {7, 8, 7, 7, 7},
.pitch_delay_bits = {9, 6},
.gp_index_bits = 4,
.fc_index_bits = {4, 5, 4, 5, 4, 5, 4, 5, 4, 5},
.gc_index_bits = 5
},
[MODE_8k5] = {
.mode_name = "8k5",
.bits_per_frame = 152,
......@@ -72,6 +89,7 @@ static const SiprModeParam modes[MODE_COUNT] = {
.pitch_sharp_factor = 0.8,
.number_of_fc_indexes = 3,
.ma_predictor_bits = 0,
.vq_indexes_bits = {6, 7, 7, 7, 5},
.pitch_delay_bits = {8, 5, 5},
.gp_index_bits = 0,
......@@ -87,6 +105,7 @@ static const SiprModeParam modes[MODE_COUNT] = {
.pitch_sharp_factor = 0.8,
.number_of_fc_indexes = 3,
.ma_predictor_bits = 0,
.vq_indexes_bits = {6, 7, 7, 7, 5},
.pitch_delay_bits = {8, 5, 5},
.gp_index_bits = 0,
......@@ -102,6 +121,7 @@ static const SiprModeParam modes[MODE_COUNT] = {
.pitch_sharp_factor = 0.85,
.number_of_fc_indexes = 1,
.ma_predictor_bits = 0,
.vq_indexes_bits = {6, 7, 7, 7, 5},
.pitch_delay_bits = {8, 5, 8, 5, 5},
.gp_index_bits = 0,
......@@ -173,6 +193,8 @@ static void decode_parameters(SiprParameters* parms, GetBitContext *pgb,
{
int i, j;
parms->ma_pred_switch = get_bits(pgb, p->ma_predictor_bits);
for (i = 0; i < 5; i++)
parms->vq_indexes[i] = get_bits(pgb, p->vq_indexes_bits[i]);
......@@ -490,6 +512,9 @@ static av_cold int sipr_decoder_init(AVCodecContext * avctx)
av_log(avctx, AV_LOG_DEBUG, "Mode: %s\n", modes[ctx->mode].mode_name);
if (ctx->mode == MODE_16k)
ff_sipr_init_16k(ctx);
for (i = 0; i < LP_FILTER_ORDER; i++)
ctx->lsp_history[i] = cos((i+1) * M_PI / (LP_FILTER_ORDER + 1));
......@@ -498,12 +523,6 @@ static av_cold int sipr_decoder_init(AVCodecContext * avctx)
avctx->sample_fmt = SAMPLE_FMT_FLT;
if (ctx->mode == MODE_16k) {
av_log(avctx, AV_LOG_ERROR, "decoding 16kbps SIPR files is not "
"supported yet.\n");
return -1;
}
dsputil_init(&ctx->dsp, avctx);
return 0;
......@@ -518,6 +537,7 @@ static int sipr_decode_frame(AVCodecContext *avctx, void *datap,
const SiprModeParam *mode_par = &modes[ctx->mode];
GetBitContext gb;
float *data = datap;
int subframe_size = ctx->mode == MODE_16k ? L_SUBFR_16k : SUBFR_SIZE;
int i;
ctx->avctx = avctx;
......@@ -529,7 +549,7 @@ static int sipr_decode_frame(AVCodecContext *avctx, void *datap,
*data_size = 0;
return -1;
}
if (*data_size < SUBFR_SIZE * mode_par->subframe_count * sizeof(float)) {
if (*data_size < subframe_size * mode_par->subframe_count * sizeof(float)) {
av_log(avctx, AV_LOG_ERROR,
"Error processing packet: output buffer (%d) too small\n",
*data_size);
......@@ -542,12 +562,16 @@ static int sipr_decode_frame(AVCodecContext *avctx, void *datap,
for (i = 0; i < mode_par->frames_per_packet; i++) {
decode_parameters(&parm, &gb, mode_par);
decode_frame(ctx, &parm, data);
data += SUBFR_SIZE * mode_par->subframe_count;
if (ctx->mode == MODE_16k)
ff_sipr_decode_frame_16k(ctx, &parm, data);
else
decode_frame(ctx, &parm, data);
data += subframe_size * mode_par->subframe_count;
}
*data_size = mode_par->frames_per_packet * SUBFR_SIZE *
*data_size = mode_par->frames_per_packet * subframe_size *
mode_par->subframe_count * sizeof(float);
return mode_par->bits_per_frame >> 3;
......
......@@ -28,6 +28,11 @@
#include "dsputil.h"
#include "acelp_pitch_delay.h"
#define LP_FILTER_ORDER_16k 16
#define L_SUBFR_16k 80
#define PITCH_MIN 30
#define PITCH_MAX 281
#define LSFQ_DIFF_MIN (0.0125 * M_PI)
#define LP_FILTER_ORDER 10
......@@ -38,6 +43,8 @@
/** Subframe size for all modes except 16k */
#define SUBFR_SIZE 48
#define SUBFRAME_COUNT_16k 2
typedef enum {
MODE_16k,
MODE_8k5,
......@@ -53,9 +60,9 @@ typedef struct {
SiprMode mode;
float past_pitch_gain;
float lsf_history[LP_FILTER_ORDER];
float lsf_history[LP_FILTER_ORDER_16k];
float excitation[L_INTERPOL + PITCH_DELAY_MAX + 5*SUBFR_SIZE];
float excitation[L_INTERPOL + PITCH_MAX + 2 * L_SUBFR_16k];
DECLARE_ALIGNED_16(float, synth_buf[LP_FILTER_ORDER + 5*SUBFR_SIZE + 6]);
......@@ -70,9 +77,19 @@ typedef struct {
float postfilter_agc;
float postfilter_mem5k0[PITCH_DELAY_MAX + LP_FILTER_ORDER];
float postfilter_syn5k0[LP_FILTER_ORDER + SUBFR_SIZE*5];
/* 16k */
int pitch_lag_prev;
float iir_mem[LP_FILTER_ORDER_16k+1];
float filt_buf[2][LP_FILTER_ORDER_16k+1];
float *filt_mem[2];
float mem_preemph[LP_FILTER_ORDER_16k];
float synth[LP_FILTER_ORDER_16k];
double lsp_history_16k[16];
} SiprContext;
typedef struct {
int ma_pred_switch; ///< switched moving average predictor
int vq_indexes[5];
int pitch_delay[5]; ///< pitch delay
int gp_index[5]; ///< adaptive-codebook gain indexes
......@@ -80,4 +97,11 @@ typedef struct {
int gc_index[5]; ///< fixed-codebook gain indexes
} SiprParameters;
extern const float ff_pow_0_5[16];
void ff_sipr_init_16k(SiprContext *ctx);
void ff_sipr_decode_frame_16k(SiprContext *ctx, SiprParameters *params,
float *out_data);
#endif /* AVCODEC_SIPR_H */
/*
* SIPR decoder for the 16k mode
*
* Copyright (c) 2008 Vladimir Voroshilov
* Copyright (c) 2009 Vitor Sessak
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <math.h>
#include "sipr.h"
#include "libavutil/mathematics.h"
#include "lsp.h"
#include "celp_math.h"
#include "acelp_vectors.h"
#include "acelp_pitch_delay.h"
#include "acelp_filters.h"
#include "celp_filters.h"
#include "sipr16kdata.h"
/**
* Convert an lsf vector into an lsp vector.
*
* @param lsf input lsf vector
* @param lsp output lsp vector
*/
static void lsf2lsp(const float *lsf, double *lsp)
{
int i;
for (i = 0; i < LP_FILTER_ORDER_16k; i++)
lsp[i] = cosf(lsf[i]);
}
static void dequant(float *out, const int *idx, const float *cbs[])
{
int i;
for (i = 0; i < 4; i++)
memcpy(out + 3*i, cbs[i] + 3*idx[i], 3*sizeof(float));
memcpy(out + 12, cbs[4] + 4*idx[4], 4*sizeof(float));
}
static void lsf_decode_fp_16k(float* lsf_history, float* isp_new,
const int* parm, int ma_pred)
{
int i;
float isp_q[LP_FILTER_ORDER_16k];
dequant(isp_q, parm, lsf_codebooks_16k);
for (i = 0; i < LP_FILTER_ORDER_16k; i++) {
isp_new[i] = (1 - qu[ma_pred]) * isp_q[i]
+ qu[ma_pred] * lsf_history[i]
+ mean_lsf_16k[i];
}
memcpy(lsf_history, isp_q, LP_FILTER_ORDER_16k * sizeof(float));
}
static int dec_delay3_1st(int index)
{
if (index < 390) {
return index + 88;
} else
return 3 * index - 690;
}
static int dec_delay3_2nd(int index, int pit_min, int pit_max,
int pitch_lag_prev)
{
if (index < 62) {
int pitch_delay_min = av_clip(pitch_lag_prev - 10,
pit_min, pit_max - 19);
return 3 * pitch_delay_min + index - 2;
} else
return 3 * pitch_lag_prev;
}
static void postfilter(float* synth, float* iir_mem, float* filt_mem[2],
float* mem_preemph)
{
float buf[30 + LP_FILTER_ORDER_16k];
float *tmpbuf = buf + LP_FILTER_ORDER_16k;
float s;
int i;
for (i = 0; i < LP_FILTER_ORDER_16k; i++)
filt_mem[0][i] = iir_mem[i] * ff_pow_0_5[i];
memcpy(tmpbuf - LP_FILTER_ORDER_16k, mem_preemph,
LP_FILTER_ORDER_16k*sizeof(*buf));
ff_celp_lp_synthesis_filterf(tmpbuf, filt_mem[1], synth, 30,
LP_FILTER_ORDER_16k);
memcpy(synth - LP_FILTER_ORDER_16k, mem_preemph,
LP_FILTER_ORDER_16k * sizeof(*synth));
ff_celp_lp_synthesis_filterf(synth, filt_mem[0], synth, 2*L_SUBFR_16k,
LP_FILTER_ORDER_16k);
memcpy(mem_preemph, synth + 2*L_SUBFR_16k - LP_FILTER_ORDER_16k,
LP_FILTER_ORDER_16k * sizeof(*synth));
FFSWAP(float *, filt_mem[0], filt_mem[1]);
for (i = 0, s = 0; i < 30; i++, s += 1.0/30)
synth[i] = tmpbuf[i] + s * (synth[i] - tmpbuf[i]);
}
/**
* Floating point version of ff_acelp_lp_decode().
*/
static void acelp_lp_decodef(float *lp_1st, float *lp_2nd,
const double *lsp_2nd, const double *lsp_prev)
{
double lsp_1st[LP_FILTER_ORDER_16k];
int i;
/* LSP values for first subframe (3.2.5 of G.729, Equation 24) */
for (i = 0; i < LP_FILTER_ORDER_16k; i++)
lsp_1st[i] = (lsp_2nd[i] + lsp_prev[i]) * 0.5;
ff_acelp_lspd2lpc(lsp_1st, lp_1st, LP_FILTER_ORDER_16k >> 1);
/* LSP values for second subframe (3.2.5 of G.729) */
ff_acelp_lspd2lpc(lsp_2nd, lp_2nd, LP_FILTER_ORDER_16k >> 1);
}
/**
* Floating point version of ff_acelp_decode_gain_code().
*/
static float acelp_decode_gain_codef(float gain_corr_factor, const float *fc_v,
float mr_energy, const float *quant_energy,
const float *ma_prediction_coeff,
int subframe_size, int ma_pred_order)
{
mr_energy +=
ff_dot_productf(quant_energy, ma_prediction_coeff, ma_pred_order);
mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) /
sqrt((0.01 + ff_dot_productf(fc_v, fc_v, subframe_size)));
return mr_energy;
}
#define DIVIDE_BY_3(x) ((x) * 10923 >> 15)
void ff_sipr_decode_frame_16k(SiprContext *ctx, SiprParameters *params,
float *out_data)
{
int frame_size = SUBFRAME_COUNT_16k * L_SUBFR_16k;
float *synth = ctx->synth_buf + LP_FILTER_ORDER_16k;
float lsf_new[LP_FILTER_ORDER_16k];
double lsp_new[LP_FILTER_ORDER_16k];
float Az[2][LP_FILTER_ORDER_16k];
float fixed_vector[L_SUBFR_16k];
float pitch_fac, gain_code;
int i;
int pitch_delay_3x;
float *excitation = ctx->excitation + 292;
lsf_decode_fp_16k(ctx->lsf_history, lsf_new, params->vq_indexes,
params->ma_pred_switch);
ff_set_min_dist_lsf(lsf_new, LSFQ_DIFF_MIN / 2, LP_FILTER_ORDER_16k);
lsf2lsp(lsf_new, lsp_new);
acelp_lp_decodef(Az[0], Az[1], lsp_new, ctx->lsp_history_16k);
memcpy(ctx->lsp_history_16k, lsp_new, LP_FILTER_ORDER_16k * sizeof(double));
memcpy(synth - LP_FILTER_ORDER_16k, ctx->synth,
LP_FILTER_ORDER_16k * sizeof(*synth));
for (i = 0; i < SUBFRAME_COUNT_16k; i++) {
int i_subfr = i * L_SUBFR_16k;
AMRFixed f;
float gain_corr_factor;
int pitch_delay_int;
int pitch_delay_frac;
if (!i) {
pitch_delay_3x = dec_delay3_1st(params->pitch_delay[i]);
} else
pitch_delay_3x = dec_delay3_2nd(params->pitch_delay[i],
PITCH_MIN, PITCH_MAX,
ctx->pitch_lag_prev);
pitch_fac = gain_pitch_cb_16k[params->gp_index[i]];
f.pitch_fac = FFMIN(pitch_fac, 1.0);
f.pitch_lag = DIVIDE_BY_3(pitch_delay_3x+1);
ctx->pitch_lag_prev = f.pitch_lag;
pitch_delay_int = DIVIDE_BY_3(pitch_delay_3x + 2);
pitch_delay_frac = pitch_delay_3x + 2 - 3*pitch_delay_int;
ff_acelp_interpolatef(&excitation[i_subfr],
&excitation[i_subfr] - pitch_delay_int + 1,
sinc_win, 3, pitch_delay_frac + 1,
LP_FILTER_ORDER, L_SUBFR_16k);
memset(fixed_vector, 0, sizeof(fixed_vector));
ff_decode_10_pulses_35bits(params->fc_indexes[i], &f,
ff_fc_4pulses_8bits_tracks_13, 5, 4);
ff_set_fixed_vector(fixed_vector, &f, 1.0, L_SUBFR_16k);
gain_corr_factor = gain_cb_16k[params->gc_index[i]];
gain_code = gain_corr_factor *
acelp_decode_gain_codef(sqrt(L_SUBFR_16k), fixed_vector,
19.0 - 15.0/(0.05*M_LN10/M_LN2),
pred_16k, ctx->energy_history,
L_SUBFR_16k, 2);
ctx->energy_history[1] = ctx->energy_history[0];
ctx->energy_history[0] = 20.0 * log10f(gain_corr_factor);
ff_weighted_vector_sumf(&excitation[i_subfr], &excitation[i_subfr],
fixed_vector, pitch_fac,
gain_code, L_SUBFR_16k);
ff_celp_lp_synthesis_filterf(synth + i_subfr, Az[i],
&excitation[i_subfr], L_SUBFR_16k,
LP_FILTER_ORDER_16k);
}
memcpy(ctx->synth, synth + frame_size - LP_FILTER_ORDER_16k,
LP_FILTER_ORDER_16k * sizeof(*synth));
memmove(ctx->excitation, ctx->excitation + 2 * L_SUBFR_16k,
(L_INTERPOL+PITCH_MAX) * sizeof(float));
postfilter(synth, ctx->iir_mem, ctx->filt_mem, ctx->mem_preemph);
memcpy(ctx->iir_mem, Az[1], LP_FILTER_ORDER_16k * sizeof(float));
ctx->dsp.vector_clipf(out_data, synth, -1, 32767./(1<<15), frame_size);
}
void ff_sipr_init_16k(SiprContext *ctx)
{
int i;
for (i = 0; i < LP_FILTER_ORDER_16k; i++)
ctx->lsp_history_16k[i] = cos((i + 1) * M_PI/(LP_FILTER_ORDER_16k + 1));
ctx->filt_mem[0] = ctx->filt_buf[0];
ctx->filt_mem[1] = ctx->filt_buf[1];
ctx->pitch_lag_prev = 180;
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment