Commit 99497b46 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '9a9e2f1c'

* commit '9a9e2f1c':
  dsputil: Split audio operations off into a separate context

Conflicts:
	configure
	libavcodec/takdec.c
	libavcodec/x86/Makefile
	libavcodec/x86/dsputil.asm
	libavcodec/x86/dsputil_init.c
	libavcodec/x86/dsputil_mmx.c
	libavcodec/x86/dsputil_x86.h
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 0dae193d 9a9e2f1c
...@@ -1795,6 +1795,7 @@ CONFIG_EXTRA=" ...@@ -1795,6 +1795,7 @@ CONFIG_EXTRA="
aandcttables aandcttables
ac3dsp ac3dsp
audio_frame_queue audio_frame_queue
audiodsp
blockdsp blockdsp
cabac cabac
dsputil dsputil
...@@ -2004,8 +2005,8 @@ aac_encoder_select="audio_frame_queue mdct sinewin" ...@@ -2004,8 +2005,8 @@ aac_encoder_select="audio_frame_queue mdct sinewin"
aac_latm_decoder_select="aac_decoder aac_latm_parser" aac_latm_decoder_select="aac_decoder aac_latm_parser"
ac3_decoder_select="mdct ac3dsp ac3_parser dsputil" ac3_decoder_select="mdct ac3dsp ac3_parser dsputil"
ac3_fixed_decoder_select="mdct ac3dsp ac3_parser dsputil" ac3_fixed_decoder_select="mdct ac3dsp ac3_parser dsputil"
ac3_encoder_select="mdct ac3dsp dsputil" ac3_encoder_select="ac3dsp audiodsp dsputil mdct"
ac3_fixed_encoder_select="mdct ac3dsp dsputil" ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct"
aic_decoder_select="dsputil golomb" aic_decoder_select="dsputil golomb"
alac_encoder_select="lpc" alac_encoder_select="lpc"
als_decoder_select="dsputil" als_decoder_select="dsputil"
...@@ -2028,7 +2029,7 @@ binkaudio_rdft_decoder_select="mdct rdft sinewin" ...@@ -2028,7 +2029,7 @@ binkaudio_rdft_decoder_select="mdct rdft sinewin"
cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
cllc_decoder_select="dsputil" cllc_decoder_select="dsputil"
comfortnoise_encoder_select="lpc" comfortnoise_encoder_select="lpc"
cook_decoder_select="dsputil mdct sinewin" cook_decoder_select="audiodsp mdct sinewin"
cscd_decoder_select="lzo" cscd_decoder_select="lzo"
cscd_decoder_suggest="zlib" cscd_decoder_suggest="zlib"
dca_decoder_select="mdct" dca_decoder_select="mdct"
...@@ -2150,7 +2151,7 @@ svq1_decoder_select="hpeldsp" ...@@ -2150,7 +2151,7 @@ svq1_decoder_select="hpeldsp"
svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc"
svq3_decoder_select="h264_decoder hpeldsp tpeldsp" svq3_decoder_select="h264_decoder hpeldsp tpeldsp"
svq3_decoder_suggest="zlib" svq3_decoder_suggest="zlib"
tak_decoder_select="dsputil" tak_decoder_select="audiodsp"
theora_decoder_select="vp3_decoder" theora_decoder_select="vp3_decoder"
thp_decoder_select="mjpeg_decoder" thp_decoder_select="mjpeg_decoder"
tiff_decoder_suggest="zlib" tiff_decoder_suggest="zlib"
......
...@@ -33,6 +33,7 @@ OBJS = allcodecs.o \ ...@@ -33,6 +33,7 @@ OBJS = allcodecs.o \
OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o OBJS-$(CONFIG_AC3DSP) += ac3dsp.o
OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o
OBJS-$(CONFIG_AUDIODSP) += audiodsp.o
OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o
OBJS-$(CONFIG_CABAC) += cabac.o OBJS-$(CONFIG_CABAC) += cabac.o
OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "avcodec.h" #include "avcodec.h"
#include "put_bits.h" #include "put_bits.h"
#include "audiodsp.h"
#include "ac3dsp.h" #include "ac3dsp.h"
#include "ac3.h" #include "ac3.h"
#include "fft.h" #include "fft.h"
...@@ -2478,6 +2479,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx) ...@@ -2478,6 +2479,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
if (ret) if (ret)
goto init_fail; goto init_fail;
ff_audiodsp_init(&s->adsp);
ff_dsputil_init(&s->dsp, avctx); ff_dsputil_init(&s->dsp, avctx);
ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "fft.h" #include "fft.h"
#include "mathops.h" #include "mathops.h"
#include "put_bits.h" #include "put_bits.h"
#include "audiodsp.h"
#ifndef CONFIG_AC3ENC_FLOAT #ifndef CONFIG_AC3ENC_FLOAT
#define CONFIG_AC3ENC_FLOAT 0 #define CONFIG_AC3ENC_FLOAT 0
...@@ -162,6 +163,7 @@ typedef struct AC3EncodeContext { ...@@ -162,6 +163,7 @@ typedef struct AC3EncodeContext {
AVCodecContext *avctx; ///< parent AVCodecContext AVCodecContext *avctx; ///< parent AVCodecContext
PutBitContext pb; ///< bitstream writer context PutBitContext pb; ///< bitstream writer context
DSPContext dsp; DSPContext dsp;
AudioDSPContext adsp;
AVFloatDSPContext fdsp; AVFloatDSPContext fdsp;
AC3DSPContext ac3dsp; ///< AC-3 optimized functions AC3DSPContext ac3dsp; ///< AC-3 optimized functions
FFTContext mdct; ///< FFT context for MDCT calculation FFTContext mdct; ///< FFT context for MDCT calculation
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define FFT_FLOAT 0 #define FFT_FLOAT 0
#undef CONFIG_AC3ENC_FLOAT #undef CONFIG_AC3ENC_FLOAT
#include "internal.h" #include "internal.h"
#include "audiodsp.h"
#include "ac3enc.h" #include "ac3enc.h"
#include "eac3enc.h" #include "eac3enc.h"
...@@ -111,9 +112,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, int64_t sum[4], ...@@ -111,9 +112,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, int64_t sum[4],
/* /*
* Clip MDCT coefficients to allowable range. * Clip MDCT coefficients to allowable range.
*/ */
static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len) static void clip_coefficients(AudioDSPContext *adsp, int32_t *coef,
unsigned int len)
{ {
dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); adsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#define CONFIG_AC3ENC_FLOAT 1 #define CONFIG_AC3ENC_FLOAT 1
#include "internal.h" #include "internal.h"
#include "audiodsp.h"
#include "ac3enc.h" #include "ac3enc.h"
#include "eac3enc.h" #include "eac3enc.h"
#include "kbdwin.h" #include "kbdwin.h"
...@@ -117,9 +118,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4], ...@@ -117,9 +118,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
/* /*
* Clip MDCT coefficients to allowable range. * Clip MDCT coefficients to allowable range.
*/ */
static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len) static void clip_coefficients(AudioDSPContext *adsp, float *coef,
unsigned int len)
{ {
dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
} }
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "audiodsp.h"
#include "internal.h" #include "internal.h"
#include "ac3enc.h" #include "ac3enc.h"
#include "eac3enc.h" #include "eac3enc.h"
...@@ -40,7 +42,8 @@ static void scale_coefficients(AC3EncodeContext *s); ...@@ -40,7 +42,8 @@ static void scale_coefficients(AC3EncodeContext *s);
static int normalize_samples(AC3EncodeContext *s); static int normalize_samples(AC3EncodeContext *s);
static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len); static void clip_coefficients(AudioDSPContext *adsp, CoefType *coef,
unsigned int len);
static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl); static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl);
...@@ -164,7 +167,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) ...@@ -164,7 +167,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
} }
/* coefficients must be clipped in order to be encoded */ /* coefficients must be clipped in order to be encoded */
clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs); clip_coefficients(&s->adsp, cpl_coef, num_cpl_coefs);
} }
/* calculate energy in each band in coupling channel and each fbw channel */ /* calculate energy in each band in coupling channel and each fbw channel */
...@@ -407,7 +410,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt, ...@@ -407,7 +410,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
if (s->fixed_point) if (s->fixed_point)
scale_coefficients(s); scale_coefficients(s);
clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1], clip_coefficients(&s->adsp, s->blocks[0].mdct_coef[1],
AC3_MAX_COEFS * s->num_blocks * s->channels); AC3_MAX_COEFS * s->num_blocks * s->channels);
s->cpl_on = s->cpl_enabled; s->cpl_on = s->cpl_enabled;
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "avcodec.h" #include "avcodec.h"
#include "acelp_pitch_delay.h" #include "acelp_pitch_delay.h"
#include "celp_math.h" #include "celp_math.h"
#include "audiodsp.h"
int ff_acelp_decode_8bit_to_1st_delay3(int ac_index) int ff_acelp_decode_8bit_to_1st_delay3(int ac_index)
{ {
...@@ -91,7 +92,7 @@ void ff_acelp_update_past_gain( ...@@ -91,7 +92,7 @@ void ff_acelp_update_past_gain(
} }
int16_t ff_acelp_decode_gain_code( int16_t ff_acelp_decode_gain_code(
DSPContext *dsp, AudioDSPContext *adsp,
int gain_corr_factor, int gain_corr_factor,
const int16_t* fc_v, const int16_t* fc_v,
int mr_energy, int mr_energy,
...@@ -118,7 +119,7 @@ int16_t ff_acelp_decode_gain_code( ...@@ -118,7 +119,7 @@ int16_t ff_acelp_decode_gain_code(
); );
#else #else
mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) / mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size)); sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
return mr_energy >> 12; return mr_energy >> 12;
#endif #endif
} }
......
...@@ -24,7 +24,8 @@ ...@@ -24,7 +24,8 @@
#define AVCODEC_ACELP_PITCH_DELAY_H #define AVCODEC_ACELP_PITCH_DELAY_H
#include <stdint.h> #include <stdint.h>
#include "dsputil.h"
#include "audiodsp.h"
#define PITCH_DELAY_MIN 20 #define PITCH_DELAY_MIN 20
#define PITCH_DELAY_MAX 143 #define PITCH_DELAY_MAX 143
...@@ -139,7 +140,7 @@ void ff_acelp_update_past_gain( ...@@ -139,7 +140,7 @@ void ff_acelp_update_past_gain(
/** /**
* @brief Decode the adaptive codebook gain and add * @brief Decode the adaptive codebook gain and add
* correction (4.1.5 and 3.9.1 of G.729). * correction (4.1.5 and 3.9.1 of G.729).
* @param dsp initialized dsputil context * @param adsp initialized audio DSP context
* @param gain_corr_factor gain correction factor (2.13) * @param gain_corr_factor gain correction factor (2.13)
* @param fc_v fixed-codebook vector (2.13) * @param fc_v fixed-codebook vector (2.13)
* @param mr_energy mean innovation energy and fixed-point correction (7.13) * @param mr_energy mean innovation energy and fixed-point correction (7.13)
...@@ -208,7 +209,7 @@ void ff_acelp_update_past_gain( ...@@ -208,7 +209,7 @@ void ff_acelp_update_past_gain(
* @remark The routine is used in G.729 and AMR (all modes). * @remark The routine is used in G.729 and AMR (all modes).
*/ */
int16_t ff_acelp_decode_gain_code( int16_t ff_acelp_decode_gain_code(
DSPContext *dsp, AudioDSPContext *adsp,
int gain_corr_factor, int gain_corr_factor,
const int16_t* fc_v, const int16_t* fc_v,
int mr_energy, int mr_energy,
......
...@@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o ...@@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o
OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \
arm/ac3dsp_arm.o arm/ac3dsp_arm.o
OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o
OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o
OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \
arm/dsputil_arm.o \ arm/dsputil_arm.o \
...@@ -80,11 +81,13 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ ...@@ -80,11 +81,13 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
NEON-OBJS += arm/fmtconvert_neon.o NEON-OBJS += arm/fmtconvert_neon.o
NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o
NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \
arm/audiodsp_neon.o \
arm/int_neon.o
NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \
arm/blockdsp_neon.o arm/blockdsp_neon.o
NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \
arm/dsputil_neon.o \ arm/dsputil_neon.o \
arm/int_neon.o \
arm/simple_idct_neon.o arm/simple_idct_neon.o
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
arm/fft_fixed_neon.o arm/fft_fixed_neon.o
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_ARM_AUDIODSP_ARM_H
#define AVCODEC_ARM_AUDIODSP_ARM_H
#include "libavcodec/audiodsp.h"
void ff_audiodsp_init_neon(AudioDSPContext *c);
#endif /* AVCODEC_ARM_AUDIODSP_ARM_H */
/*
* ARM optimized audio functions
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/audiodsp.h"
#include "audiodsp_arm.h"
av_cold void ff_audiodsp_init_arm(AudioDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags))
ff_audiodsp_init_neon(c);
}
/*
* ARM NEON optimised audio functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavcodec/audiodsp.h"
#include "audiodsp_arm.h"
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
av_cold void ff_audiodsp_init_neon(AudioDSPContext *c)
{
c->vector_clip_int32 = ff_vector_clip_int32_neon;
c->vector_clipf = ff_vector_clipf_neon;
c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
}
/*
* ARM NEON optimised audio functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/asm.S"
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
NOVFP vdup.32 q0, r2
NOVFP vdup.32 q1, r3
NOVFP ldr r2, [sp]
vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]!
vmin.f32 q11, q3, q1
1: vmax.f32 q8, q10, q0
vmax.f32 q9, q11, q0
subs r2, r2, #8
beq 2f
vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]!
vmin.f32 q11, q3, q1
vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]!
b 1b
2: vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]!
bx lr
endfunc
function ff_vector_clip_int32_neon, export=1
vdup.32 q0, r2
vdup.32 q1, r3
ldr r2, [sp]
1:
vld1.32 {q2-q3}, [r1,:128]!
vmin.s32 q2, q2, q1
vmin.s32 q3, q3, q1
vmax.s32 q2, q2, q0
vmax.s32 q3, q3, q0
vst1.32 {q2-q3}, [r0,:128]!
subs r2, r2, #8
bgt 1b
bx lr
endfunc
...@@ -34,13 +34,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); ...@@ -34,13 +34,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth) unsigned high_bit_depth)
{ {
...@@ -58,9 +51,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, ...@@ -58,9 +51,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
c->add_pixels_clamped = ff_add_pixels_clamped_neon; c->add_pixels_clamped = ff_add_pixels_clamped_neon;
c->put_pixels_clamped = ff_put_pixels_clamped_neon; c->put_pixels_clamped = ff_put_pixels_clamped_neon;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon;
c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
} }
...@@ -126,45 +126,3 @@ function ff_add_pixels_clamped_neon, export=1 ...@@ -126,45 +126,3 @@ function ff_add_pixels_clamped_neon, export=1
vst1.8 {d6}, [r3,:64], r2 vst1.8 {d6}, [r3,:64], r2
bx lr bx lr
endfunc endfunc
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
NOVFP vdup.32 q0, r2
NOVFP vdup.32 q1, r3
NOVFP ldr r2, [sp]
vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]!
vmin.f32 q11, q3, q1
1: vmax.f32 q8, q10, q0
vmax.f32 q9, q11, q0
subs r2, r2, #8
beq 2f
vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]!
vmin.f32 q11, q3, q1
vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]!
b 1b
2: vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]!
bx lr
endfunc
function ff_vector_clip_int32_neon, export=1
vdup.32 q0, r2
vdup.32 q1, r3
ldr r2, [sp]
1:
vld1.32 {q2-q3}, [r1,:128]!
vmin.s32 q2, q2, q1
vmin.s32 q3, q3, q1
vmax.s32 q2, q2, q0
vmax.s32 q3, q3, q0
vst1.32 {q2-q3}, [r0,:128]!
subs r2, r2, #8
bgt 1b
bx lr
endfunc
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/common.h"
#include "audiodsp.h"
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
uint32_t maxi, uint32_t maxisign)
{
if (a > mini)
return mini;
else if ((a ^ (1U << 31)) > maxisign)
return maxi;
else
return a;
}
static void vector_clipf_c_opposite_sign(float *dst, const float *src,
float *min, float *max, int len)
{
int i;
uint32_t mini = *(uint32_t *) min;
uint32_t maxi = *(uint32_t *) max;
uint32_t maxisign = maxi ^ (1U << 31);
uint32_t *dsti = (uint32_t *) dst;
const uint32_t *srci = (const uint32_t *) src;
for (i = 0; i < len; i += 8) {
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
}
}
static void vector_clipf_c(float *dst, const float *src,
float min, float max, int len)
{
int i;
if (min < 0 && max > 0) {
vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
} else {
for (i = 0; i < len; i += 8) {
dst[i] = av_clipf(src[i], min, max);
dst[i + 1] = av_clipf(src[i + 1], min, max);
dst[i + 2] = av_clipf(src[i + 2], min, max);
dst[i + 3] = av_clipf(src[i + 3], min, max);
dst[i + 4] = av_clipf(src[i + 4], min, max);
dst[i + 5] = av_clipf(src[i + 5], min, max);
dst[i + 6] = av_clipf(src[i + 6], min, max);
dst[i + 7] = av_clipf(src[i + 7], min, max);
}
}
}
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
int order)
{
int res = 0;
while (order--)
res += *v1++ **v2++;
return res;
}
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len)
{
do {
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
len -= 8;
} while (len > 0);
}
av_cold void ff_audiodsp_init(AudioDSPContext *c)
{
c->scalarproduct_int16 = scalarproduct_int16_c;
c->vector_clip_int32 = vector_clip_int32_c;
c->vector_clipf = vector_clipf_c;
if (ARCH_ARM)
ff_audiodsp_init_arm(c);
if (ARCH_PPC)
ff_audiodsp_init_ppc(c);
if (ARCH_X86)
ff_audiodsp_init_x86(c);
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AUDIODSP_H
#define AVCODEC_AUDIODSP_H
#include <stdint.h>
typedef struct AudioDSPContext {
/**
* Calculate scalar product of two vectors.
* @param len length of vectors, should be multiple of 16
*/
int32_t (*scalarproduct_int16)(const int16_t *v1,
const int16_t *v2 /* align 16 */, int len);
/**
* Clip each element in an array of int32_t to a given minimum and
* maximum value.
* @param dst destination array
* constraints: 16-byte aligned
* @param src source array
* constraints: 16-byte aligned
* @param min minimum value
* constraints: must be in the range [-(1 << 24), 1 << 24]
* @param max maximum value
* constraints: must be in the range [-(1 << 24), 1 << 24]
* @param len number of elements in the array
* constraints: multiple of 32 greater than zero
*/
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */,
const float *src /* align 16 */,
float min, float max, int len /* align 16 */);
} AudioDSPContext;
void ff_audiodsp_init(AudioDSPContext *c);
void ff_audiodsp_init_arm(AudioDSPContext *c);
void ff_audiodsp_init_ppc(AudioDSPContext *c);
void ff_audiodsp_init_x86(AudioDSPContext *c);
#endif /* AVCODEC_AUDIODSP_H */
...@@ -44,9 +44,10 @@ ...@@ -44,9 +44,10 @@
#include "libavutil/channel_layout.h" #include "libavutil/channel_layout.h"
#include "libavutil/lfg.h" #include "libavutil/lfg.h"
#include "audiodsp.h"
#include "avcodec.h" #include "avcodec.h"
#include "get_bits.h" #include "get_bits.h"
#include "dsputil.h"
#include "bytestream.h" #include "bytestream.h"
#include "fft.h" #include "fft.h"
#include "internal.h" #include "internal.h"
...@@ -123,7 +124,7 @@ typedef struct cook { ...@@ -123,7 +124,7 @@ typedef struct cook {
void (*saturate_output)(struct cook *q, float *out); void (*saturate_output)(struct cook *q, float *out);
AVCodecContext* avctx; AVCodecContext* avctx;
DSPContext dsp; AudioDSPContext adsp;
GetBitContext gb; GetBitContext gb;
/* stream data */ /* stream data */
int num_vectors; int num_vectors;
...@@ -873,8 +874,8 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, ...@@ -873,8 +874,8 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p,
*/ */
static void saturate_output_float(COOKContext *q, float *out) static void saturate_output_float(COOKContext *q, float *out)
{ {
q->dsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
-1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8));
} }
...@@ -1072,7 +1073,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx) ...@@ -1072,7 +1073,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
/* Initialize RNG. */ /* Initialize RNG. */
av_lfg_init(&q->random_state, 0); av_lfg_init(&q->random_state, 0);
ff_dsputil_init(&q->dsp, avctx); ff_audiodsp_init(&q->adsp);
while (edata_ptr < edata_ptr_end) { while (edata_ptr < edata_ptr_end) {
/* 8 for mono, 16 for stereo, ? for multichannel /* 8 for mono, 16 for stereo, ? for multichannel
......
...@@ -1345,87 +1345,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) ...@@ -1345,87 +1345,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
WRAPPER8_16_SQ(rd8x8_c, rd16_c) WRAPPER8_16_SQ(rd8x8_c, rd16_c)
WRAPPER8_16_SQ(bit8x8_c, bit16_c) WRAPPER8_16_SQ(bit8x8_c, bit16_c)
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
uint32_t maxi, uint32_t maxisign)
{
if (a > mini)
return mini;
else if ((a ^ (1U << 31)) > maxisign)
return maxi;
else
return a;
}
static void vector_clipf_c_opposite_sign(float *dst, const float *src,
float *min, float *max, int len)
{
int i;
uint32_t mini = *(uint32_t *) min;
uint32_t maxi = *(uint32_t *) max;
uint32_t maxisign = maxi ^ (1U << 31);
uint32_t *dsti = (uint32_t *) dst;
const uint32_t *srci = (const uint32_t *) src;
for (i = 0; i < len; i += 8) {
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
}
}
static void vector_clipf_c(float *dst, const float *src,
float min, float max, int len)
{
int i;
if (min < 0 && max > 0) {
vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
} else {
for (i = 0; i < len; i += 8) {
dst[i] = av_clipf(src[i], min, max);
dst[i + 1] = av_clipf(src[i + 1], min, max);
dst[i + 2] = av_clipf(src[i + 2], min, max);
dst[i + 3] = av_clipf(src[i + 3], min, max);
dst[i + 4] = av_clipf(src[i + 4], min, max);
dst[i + 5] = av_clipf(src[i + 5], min, max);
dst[i + 6] = av_clipf(src[i + 6], min, max);
dst[i + 7] = av_clipf(src[i + 7], min, max);
}
}
}
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
int order)
{
int res = 0;
while (order--)
res += *v1++ **v2++;
return res;
}
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len)
{
do {
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
len -= 8;
} while (len > 0);
}
static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
{ {
ff_j_rev_dct(block); ff_j_rev_dct(block);
...@@ -1661,10 +1580,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) ...@@ -1661,10 +1580,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->try_8x8basis = try_8x8basis_c; c->try_8x8basis = try_8x8basis_c;
c->add_8x8basis = add_8x8basis_c; c->add_8x8basis = add_8x8basis_c;
c->scalarproduct_int16 = scalarproduct_int16_c;
c->vector_clip_int32 = vector_clip_int32_c;
c->vector_clipf = vector_clipf_c;
c->shrink[0] = av_image_copy_plane; c->shrink[0] = av_image_copy_plane;
c->shrink[1] = ff_shrink22; c->shrink[1] = ff_shrink22;
c->shrink[2] = ff_shrink44; c->shrink[2] = ff_shrink44;
......
...@@ -140,11 +140,6 @@ typedef struct DSPContext { ...@@ -140,11 +140,6 @@ typedef struct DSPContext {
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */,
const float *src /* align 16 */,
float min, float max, int len /* align 16 */);
/* (I)DCT */ /* (I)DCT */
void (*fdct)(int16_t *block /* align 16 */); void (*fdct)(int16_t *block /* align 16 */);
void (*fdct248)(int16_t *block /* align 16 */); void (*fdct248)(int16_t *block /* align 16 */);
...@@ -204,30 +199,6 @@ typedef struct DSPContext { ...@@ -204,30 +199,6 @@ typedef struct DSPContext {
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
int src_wrap, int width, int height); int src_wrap, int width, int height);
/**
* Calculate scalar product of two vectors.
* @param len length of vectors, should be multiple of 16
*/
int32_t (*scalarproduct_int16)(const int16_t *v1,
const int16_t *v2 /* align 16 */, int len);
/**
* Clip each element in an array of int32_t to a given minimum and
* maximum value.
* @param dst destination array
* constraints: 16-byte aligned
* @param src source array
* constraints: 16-byte aligned
* @param min minimum value
* constraints: must be in the range [-(1 << 24), 1 << 24]
* @param max maximum value
* constraints: must be in the range [-(1 << 24), 1 << 24]
* @param len number of elements in the array
* constraints: multiple of 32 greater than zero
*/
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
} DSPContext; } DSPContext;
void ff_dsputil_static_init(void); void ff_dsputil_static_init(void);
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include "avcodec.h" #include "avcodec.h"
#include "libavutil/avutil.h" #include "libavutil/avutil.h"
#include "get_bits.h" #include "get_bits.h"
#include "dsputil.h" #include "audiodsp.h"
#include "internal.h" #include "internal.h"
...@@ -100,7 +100,7 @@ typedef struct { ...@@ -100,7 +100,7 @@ typedef struct {
} G729FormatDescription; } G729FormatDescription;
typedef struct { typedef struct {
DSPContext dsp; AudioDSPContext adsp;
/// past excitation signal buffer /// past excitation signal buffer
int16_t exc_base[2*SUBFRAME_SIZE+PITCH_DELAY_MAX+INTERPOL_LEN]; int16_t exc_base[2*SUBFRAME_SIZE+PITCH_DELAY_MAX+INTERPOL_LEN];
...@@ -381,8 +381,8 @@ static av_cold int decoder_init(AVCodecContext * avctx) ...@@ -381,8 +381,8 @@ static av_cold int decoder_init(AVCodecContext * avctx)
for(i=0; i<4; i++) for(i=0; i<4; i++)
ctx->quant_energy[i] = -14336; // -14 in (5.10) ctx->quant_energy[i] = -14336; // -14 in (5.10)
ff_dsputil_init(&ctx->dsp, avctx); ff_audiodsp_init(&ctx->adsp);
ctx->dsp.scalarproduct_int16 = scalarproduct_int16_c; ctx->adsp.scalarproduct_int16 = scalarproduct_int16_c;
return 0; return 0;
} }
...@@ -578,7 +578,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, ...@@ -578,7 +578,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
} }
/* Decode the fixed-codebook gain. */ /* Decode the fixed-codebook gain. */
ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->dsp, gain_corr_factor, ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->adsp, gain_corr_factor,
fc, MR_ENERGY, fc, MR_ENERGY,
ctx->quant_energy, ctx->quant_energy,
ma_prediction_coeff, ma_prediction_coeff,
...@@ -668,7 +668,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr, ...@@ -668,7 +668,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
/* Call postfilter and also update voicing decision for use in next frame. */ /* Call postfilter and also update voicing decision for use in next frame. */
ff_g729_postfilter( ff_g729_postfilter(
&ctx->dsp, &ctx->adsp,
&ctx->ht_prev_data, &ctx->ht_prev_data,
&is_periodic, &is_periodic,
&lp[i][0], &lp[i][0],
......
...@@ -107,7 +107,7 @@ static void residual_filter(int16_t* out, const int16_t* filter_coeffs, const in ...@@ -107,7 +107,7 @@ static void residual_filter(int16_t* out, const int16_t* filter_coeffs, const in
* *
* \return 0 if long-term prediction gain is less than 3dB, 1 - otherwise * \return 0 if long-term prediction gain is less than 3dB, 1 - otherwise
*/ */
static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, static int16_t long_term_filter(AudioDSPContext *adsp, int pitch_delay_int,
const int16_t* residual, int16_t *residual_filt, const int16_t* residual, int16_t *residual_filt,
int subframe_size) int subframe_size)
{ {
...@@ -161,7 +161,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -161,7 +161,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
/* Start of best delay searching code */ /* Start of best delay searching code */
gain_num = 0; gain_num = 0;
ener = dsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE, ener = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
sig_scaled + RES_PREV_DATA_SIZE, sig_scaled + RES_PREV_DATA_SIZE,
subframe_size); subframe_size);
if (ener) { if (ener) {
...@@ -190,7 +190,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -190,7 +190,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
corr_int_num = 0; corr_int_num = 0;
best_delay_int = pitch_delay_int - 1; best_delay_int = pitch_delay_int - 1;
for (i = pitch_delay_int - 1; i <= pitch_delay_int + 1; i++) { for (i = pitch_delay_int - 1; i <= pitch_delay_int + 1; i++) {
sum = dsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE, sum = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
sig_scaled + RES_PREV_DATA_SIZE - i, sig_scaled + RES_PREV_DATA_SIZE - i,
subframe_size); subframe_size);
if (sum > corr_int_num) { if (sum > corr_int_num) {
...@@ -200,7 +200,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -200,7 +200,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
} }
if (corr_int_num) { if (corr_int_num) {
/* Compute denominator of pseudo-normalized correlation R'(0). */ /* Compute denominator of pseudo-normalized correlation R'(0). */
corr_int_den = dsp->scalarproduct_int16(sig_scaled - best_delay_int + RES_PREV_DATA_SIZE, corr_int_den = adsp->scalarproduct_int16(sig_scaled - best_delay_int + RES_PREV_DATA_SIZE,
sig_scaled - best_delay_int + RES_PREV_DATA_SIZE, sig_scaled - best_delay_int + RES_PREV_DATA_SIZE,
subframe_size); subframe_size);
...@@ -227,7 +227,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -227,7 +227,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
Also compute maximum value of above denominators over all k. */ Also compute maximum value of above denominators over all k. */
tmp = corr_int_den; tmp = corr_int_den;
for (k = 0; k < ANALYZED_FRAC_DELAYS; k++) { for (k = 0; k < ANALYZED_FRAC_DELAYS; k++) {
sum = dsp->scalarproduct_int16(&delayed_signal[k][1], sum = adsp->scalarproduct_int16(&delayed_signal[k][1],
&delayed_signal[k][1], &delayed_signal[k][1],
subframe_size - 1); subframe_size - 1);
corr_den[k][0] = sum + delayed_signal[k][0 ] * delayed_signal[k][0 ]; corr_den[k][0] = sum + delayed_signal[k][0 ] * delayed_signal[k][0 ];
...@@ -255,7 +255,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -255,7 +255,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
int gain_num_short_square; int gain_num_short_square;
/* Compute numerator of pseudo-normalized /* Compute numerator of pseudo-normalized
correlation R'(k). */ correlation R'(k). */
sum = dsp->scalarproduct_int16(&delayed_signal[k][i], sum = adsp->scalarproduct_int16(&delayed_signal[k][i],
sig_scaled + RES_PREV_DATA_SIZE, sig_scaled + RES_PREV_DATA_SIZE,
subframe_size); subframe_size);
gain_num_short = FFMAX(sum >> sh_gain_num, 0); gain_num_short = FFMAX(sum >> sh_gain_num, 0);
...@@ -312,7 +312,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -312,7 +312,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
LONG_INT_FILT_LEN, LONG_INT_FILT_LEN,
subframe_size + 1); subframe_size + 1);
/* Compute R'(k) correlation's numerator. */ /* Compute R'(k) correlation's numerator. */
sum = dsp->scalarproduct_int16(residual_filt, sum = adsp->scalarproduct_int16(residual_filt,
sig_scaled + RES_PREV_DATA_SIZE, sig_scaled + RES_PREV_DATA_SIZE,
subframe_size); subframe_size);
...@@ -327,7 +327,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -327,7 +327,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
} }
/* Compute R'(k) correlation's denominator. */ /* Compute R'(k) correlation's denominator. */
sum = dsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size); sum = adsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size);
tmp = FFMAX(av_log2(sum) - 14, 0); tmp = FFMAX(av_log2(sum) - 14, 0);
sum >>= tmp; sum >>= tmp;
...@@ -421,7 +421,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int, ...@@ -421,7 +421,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
* *
* \note All members of lp_gn, except 10-19 must be equal to zero. * \note All members of lp_gn, except 10-19 must be equal to zero.
*/ */
static int16_t get_tilt_comp(DSPContext *dsp, int16_t *lp_gn, static int16_t get_tilt_comp(AudioDSPContext *adsp, int16_t *lp_gn,
const int16_t *lp_gd, int16_t* speech, const int16_t *lp_gd, int16_t* speech,
int subframe_size) int subframe_size)
{ {
...@@ -437,8 +437,8 @@ static int16_t get_tilt_comp(DSPContext *dsp, int16_t *lp_gn, ...@@ -437,8 +437,8 @@ static int16_t get_tilt_comp(DSPContext *dsp, int16_t *lp_gn,
/* Now lp_gn (starting with 10) contains impulse response /* Now lp_gn (starting with 10) contains impulse response
of A(z/FORMANT_PP_FACTOR_NUM)/A(z/FORMANT_PP_FACTOR_DEN) filter. */ of A(z/FORMANT_PP_FACTOR_NUM)/A(z/FORMANT_PP_FACTOR_DEN) filter. */
rh0 = dsp->scalarproduct_int16(lp_gn + 10, lp_gn + 10, 20); rh0 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 10, 20);
rh1 = dsp->scalarproduct_int16(lp_gn + 10, lp_gn + 11, 20); rh1 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 11, 20);
/* downscale to avoid overflow */ /* downscale to avoid overflow */
temp = av_log2(rh0) - 14; temp = av_log2(rh0) - 14;
...@@ -511,7 +511,7 @@ static int16_t apply_tilt_comp(int16_t* out, int16_t* res_pst, int refl_coeff, ...@@ -511,7 +511,7 @@ static int16_t apply_tilt_comp(int16_t* out, int16_t* res_pst, int refl_coeff,
return tmp; return tmp;
} }
void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing,
const int16_t *lp_filter_coeffs, int pitch_delay_int, const int16_t *lp_filter_coeffs, int pitch_delay_int,
int16_t* residual, int16_t* res_filter_data, int16_t* residual, int16_t* res_filter_data,
int16_t* pos_filter_data, int16_t *speech, int subframe_size) int16_t* pos_filter_data, int16_t *speech, int subframe_size)
...@@ -541,7 +541,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, ...@@ -541,7 +541,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing,
/* long-term filter. If long-term prediction gain is larger than 3dB (returned value is /* long-term filter. If long-term prediction gain is larger than 3dB (returned value is
nonzero) then declare current subframe as periodic. */ nonzero) then declare current subframe as periodic. */
*voicing = FFMAX(*voicing, long_term_filter(dsp, pitch_delay_int, *voicing = FFMAX(*voicing, long_term_filter(adsp, pitch_delay_int,
residual, residual_filt_buf + 10, residual, residual_filt_buf + 10,
subframe_size)); subframe_size));
...@@ -549,7 +549,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, ...@@ -549,7 +549,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing,
memmove(residual, residual + subframe_size, RES_PREV_DATA_SIZE * sizeof(int16_t)); memmove(residual, residual + subframe_size, RES_PREV_DATA_SIZE * sizeof(int16_t));
/* short-term filter tilt compensation */ /* short-term filter tilt compensation */
tilt_comp_coeff = get_tilt_comp(dsp, lp_gn, lp_gd, residual_filt_buf + 10, subframe_size); tilt_comp_coeff = get_tilt_comp(adsp, lp_gn, lp_gd, residual_filt_buf + 10, subframe_size);
/* Apply second half of short-term postfilter: 1/A(z/FORMANT_PP_FACTOR_DEN) */ /* Apply second half of short-term postfilter: 1/A(z/FORMANT_PP_FACTOR_DEN) */
ff_celp_lp_synthesis_filter(pos_filter_data + 10, lp_gd + 1, ff_celp_lp_synthesis_filter(pos_filter_data + 10, lp_gd + 1,
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#define FFMPEG_G729POSTFILTER_H #define FFMPEG_G729POSTFILTER_H
#include <stdint.h> #include <stdint.h>
#include "dsputil.h" #include "audiodsp.h"
/** /**
* tilt compensation factor (G.729, k1>0) * tilt compensation factor (G.729, k1>0)
...@@ -94,7 +94,7 @@ ...@@ -94,7 +94,7 @@
* Short-term postfilter (4.2.2). * Short-term postfilter (4.2.2).
* Tilt-compensation (4.2.3) * Tilt-compensation (4.2.3)
*/ */
void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing, void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing,
const int16_t *lp_filter_coeffs, int pitch_delay_int, const int16_t *lp_filter_coeffs, int pitch_delay_int,
int16_t* residual, int16_t* res_filter_data, int16_t* residual, int16_t* res_filter_data,
int16_t* pos_filter_data, int16_t *speech, int16_t* pos_filter_data, int16_t *speech,
......
OBJS += ppc/fmtconvert_altivec.o \ OBJS += ppc/fmtconvert_altivec.o \
OBJS-$(CONFIG_AUDIODSP) += ppc/audiodsp.o
OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o
OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o
OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o
...@@ -24,7 +25,6 @@ ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ ...@@ -24,7 +25,6 @@ ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \
ppc/fdct_altivec.o \ ppc/fdct_altivec.o \
ppc/gmc_altivec.o \ ppc/gmc_altivec.o \
ppc/idct_altivec.o \ ppc/idct_altivec.o \
ppc/int_altivec.o \
FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o
FFT-OBJS-$(HAVE_VSX) += ppc/fft_vsx.o FFT-OBJS-$(HAVE_VSX) += ppc/fft_vsx.o
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
/** /**
* @file * @file
* miscellaneous integer operations * miscellaneous audio operations
*/ */
#include "config.h" #include "config.h"
...@@ -29,10 +29,13 @@ ...@@ -29,10 +29,13 @@
#endif #endif
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h" #include "libavutil/ppc/util_altivec.h"
#include "libavcodec/dsputil.h" #include "libavcodec/audiodsp.h"
#include "dsputil_altivec.h"
#if HAVE_ALTIVEC
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
int order) int order)
...@@ -56,7 +59,14 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, ...@@ -56,7 +59,14 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
return ires; return ires;
} }
av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) #endif /* HAVE_ALTIVEC */
av_cold void ff_audiodsp_init_ppc(AudioDSPContext *c)
{ {
#if HAVE_ALTIVEC
if (!PPC_ALTIVEC(av_get_cpu_flags()))
return;
c->scalarproduct_int16 = scalarproduct_int16_altivec; c->scalarproduct_int16 = scalarproduct_int16_altivec;
#endif /* HAVE_ALTIVEC */
} }
...@@ -36,6 +36,5 @@ void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); ...@@ -36,6 +36,5 @@ void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth); unsigned high_bit_depth);
void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx);
#endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */
...@@ -35,7 +35,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, ...@@ -35,7 +35,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (PPC_ALTIVEC(mm_flags)) { if (PPC_ALTIVEC(mm_flags)) {
ff_dsputil_init_altivec(c, avctx, high_bit_depth); ff_dsputil_init_altivec(c, avctx, high_bit_depth);
ff_int_init_altivec(c, avctx);
c->gmc1 = ff_gmc1_altivec; c->gmc1 = ff_gmc1_altivec;
if (!high_bit_depth) { if (!high_bit_depth) {
......
...@@ -1681,9 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy) ...@@ -1681,9 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
} }
/** inverse root mean square */ /** inverse root mean square */
int ff_irms(DSPContext *dsp, const int16_t *data) int ff_irms(AudioDSPContext *adsp, const int16_t *data)
{ {
unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE); unsigned int sum = adsp->scalarproduct_int16(data, data, BLOCKSIZE);
if (sum == 0) if (sum == 0)
return 0; /* OOPS - division by zero */ return 0; /* OOPS - division by zero */
...@@ -1701,7 +1701,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, ...@@ -1701,7 +1701,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
if (cba_idx) { if (cba_idx) {
cba_idx += BLOCKSIZE/2 - 1; cba_idx += BLOCKSIZE/2 - 1;
ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx); ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx);
m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12; m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * gval) >> 12;
} else { } else {
m[0] = 0; m[0] = 0;
} }
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <stdint.h> #include <stdint.h>
#include "lpc.h" #include "lpc.h"
#include "audio_frame_queue.h" #include "audio_frame_queue.h"
#include "dsputil.h" #include "audiodsp.h"
#define NBLOCKS 4 ///< number of subblocks within a block #define NBLOCKS 4 ///< number of subblocks within a block
#define BLOCKSIZE 40 ///< subblock size in 16-bit words #define BLOCKSIZE 40 ///< subblock size in 16-bit words
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
typedef struct RA144Context { typedef struct RA144Context {
AVCodecContext *avctx; AVCodecContext *avctx;
DSPContext dsp; AudioDSPContext adsp;
LPCContext lpc_ctx; LPCContext lpc_ctx;
AudioFrameQueue afq; AudioFrameQueue afq;
int last_frame; int last_frame;
...@@ -72,7 +72,7 @@ unsigned int ff_rms(const int *data); ...@@ -72,7 +72,7 @@ unsigned int ff_rms(const int *data);
int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold, int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold,
int energy); int energy);
unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy); unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy);
int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/); int ff_irms(AudioDSPContext *adsp, const int16_t *data/*align 16*/);
void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
int cba_idx, int cb1_idx, int cb2_idx, int cba_idx, int cb1_idx, int cb2_idx,
int gval, int gain); int gval, int gain);
......
...@@ -34,7 +34,7 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx) ...@@ -34,7 +34,7 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
RA144Context *ractx = avctx->priv_data; RA144Context *ractx = avctx->priv_data;
ractx->avctx = avctx; ractx->avctx = avctx;
ff_dsputil_init(&ractx->dsp, avctx); ff_audiodsp_init(&ractx->adsp);
ractx->lpc_coef[0] = ractx->lpc_tables[0]; ractx->lpc_coef[0] = ractx->lpc_tables[0];
ractx->lpc_coef[1] = ractx->lpc_tables[1]; ractx->lpc_coef[1] = ractx->lpc_tables[1];
......
...@@ -61,7 +61,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx) ...@@ -61,7 +61,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
ractx->lpc_coef[0] = ractx->lpc_tables[0]; ractx->lpc_coef[0] = ractx->lpc_tables[0];
ractx->lpc_coef[1] = ractx->lpc_tables[1]; ractx->lpc_coef[1] = ractx->lpc_tables[1];
ractx->avctx = avctx; ractx->avctx = avctx;
ff_dsputil_init(&ractx->dsp, avctx); ff_audiodsp_init(&ractx->adsp);
ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER, ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
FF_LPC_TYPE_LEVINSON); FF_LPC_TYPE_LEVINSON);
if (ret < 0) if (ret < 0)
...@@ -374,7 +374,7 @@ static void ra144_encode_subblock(RA144Context *ractx, ...@@ -374,7 +374,7 @@ static void ra144_encode_subblock(RA144Context *ractx,
memcpy(cba, work + LPC_ORDER, sizeof(cba)); memcpy(cba, work + LPC_ORDER, sizeof(cba));
ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1); ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * rms) >> 12; m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * rms) >> 12;
} }
fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx); fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
for (i = 0; i < BLOCKSIZE; i++) { for (i = 0; i < BLOCKSIZE; i++) {
......
...@@ -28,9 +28,9 @@ ...@@ -28,9 +28,9 @@
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/samplefmt.h" #include "libavutil/samplefmt.h"
#include "tak.h" #include "tak.h"
#include "audiodsp.h"
#include "thread.h" #include "thread.h"
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h"
#include "internal.h" #include "internal.h"
#include "unary.h" #include "unary.h"
...@@ -46,7 +46,7 @@ typedef struct MCDParam { ...@@ -46,7 +46,7 @@ typedef struct MCDParam {
typedef struct TAKDecContext { typedef struct TAKDecContext {
AVCodecContext *avctx; ///< parent AVCodecContext AVCodecContext *avctx; ///< parent AVCodecContext
DSPContext dsp; AudioDSPContext adsp;
TAKStreamInfo ti; TAKStreamInfo ti;
GetBitContext gb; ///< bitstream reader initialized to start at the current frame GetBitContext gb; ///< bitstream reader initialized to start at the current frame
...@@ -171,7 +171,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx) ...@@ -171,7 +171,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
{ {
TAKDecContext *s = avctx->priv_data; TAKDecContext *s = avctx->priv_data;
ff_dsputil_init(&s->dsp, avctx); ff_audiodsp_init(&s->adsp);
s->avctx = avctx; s->avctx = avctx;
avctx->bits_per_raw_sample = avctx->bits_per_coded_sample; avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
...@@ -469,8 +469,8 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded, ...@@ -469,8 +469,8 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded,
int v = 1 << (filter_quant - 1); int v = 1 << (filter_quant - 1);
if (filter_order & -16) if (filter_order & -16)
v += s->dsp.scalarproduct_int16(&s->residues[i], s->filter, v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter,
filter_order & -16); filter_order & -16);
for (j = filter_order & -16; j < filter_order; j += 4) { for (j = filter_order & -16; j < filter_order; j += 4) {
v += s->residues[i + j + 3] * s->filter[j + 3] + v += s->residues[i + j + 3] * s->filter[j + 3] +
s->residues[i + j + 2] * s->filter[j + 2] + s->residues[i + j + 2] * s->filter[j + 2] +
...@@ -640,8 +640,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length) ...@@ -640,8 +640,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
int v = 1 << 9; int v = 1 << 9;
if (filter_order == 16) { if (filter_order == 16) {
v += s->dsp.scalarproduct_int16(&s->residues[i], s->filter, v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter,
filter_order); filter_order);
} else { } else {
v += s->residues[i + 7] * s->filter[7] + v += s->residues[i + 7] * s->filter[7] +
s->residues[i + 6] * s->filter[6] + s->residues[i + 6] * s->filter[6] +
......
...@@ -2,6 +2,7 @@ OBJS += x86/constants.o \ ...@@ -2,6 +2,7 @@ OBJS += x86/constants.o \
x86/fmtconvert_init.o \ x86/fmtconvert_init.o \
OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o
OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o
OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o
OBJS-$(CONFIG_DCT) += x86/dct_init.o OBJS-$(CONFIG_DCT) += x86/dct_init.o
OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o
...@@ -69,6 +70,7 @@ YASM-OBJS += x86/deinterlace.o \ ...@@ -69,6 +70,7 @@ YASM-OBJS += x86/deinterlace.o \
x86/fmtconvert.o \ x86/fmtconvert.o \
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o
YASM-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp.o YASM-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp.o
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\ YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\
......
;******************************************************************************
;* optimized audio functions
;* Copyright (c) 2008 Loren Merritt
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION_TEXT
%macro SCALARPRODUCT 0
; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
cglobal scalarproduct_int16, 3,3,3, v1, v2, order
shl orderq, 1
add v1q, orderq
add v2q, orderq
neg orderq
pxor m2, m2
.loop:
movu m0, [v1q + orderq]
movu m1, [v1q + orderq + mmsize]
pmaddwd m0, [v2q + orderq]
pmaddwd m1, [v2q + orderq + mmsize]
paddd m2, m0
paddd m2, m1
add orderq, mmsize*2
jl .loop
HADDD m2, m0
movd eax, m2
%if mmsize == 8
emms
%endif
RET
%endmacro
INIT_MMX mmxext
SCALARPRODUCT
INIT_XMM sse2
SCALARPRODUCT
;-----------------------------------------------------------------------------
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
; int32_t max, unsigned int len)
;-----------------------------------------------------------------------------
; %1 = number of xmm registers used
; %2 = number of inline load/process/store loops per asm loop
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
; %5 = suffix
%macro VECTOR_CLIP_INT32 4-5
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
%if %4
cvtsi2ss m4, minm
cvtsi2ss m5, maxm
%else
movd m4, minm
movd m5, maxm
%endif
SPLATD m4
SPLATD m5
.loop:
%assign %%i 0
%rep %2
mova m0, [srcq+mmsize*(0+%%i)]
mova m1, [srcq+mmsize*(1+%%i)]
mova m2, [srcq+mmsize*(2+%%i)]
mova m3, [srcq+mmsize*(3+%%i)]
%if %3
mova m7, [srcq+mmsize*(4+%%i)]
mova m8, [srcq+mmsize*(5+%%i)]
mova m9, [srcq+mmsize*(6+%%i)]
mova m10, [srcq+mmsize*(7+%%i)]
%endif
CLIPD m0, m4, m5, m6
CLIPD m1, m4, m5, m6
CLIPD m2, m4, m5, m6
CLIPD m3, m4, m5, m6
%if %3
CLIPD m7, m4, m5, m6
CLIPD m8, m4, m5, m6
CLIPD m9, m4, m5, m6
CLIPD m10, m4, m5, m6
%endif
mova [dstq+mmsize*(0+%%i)], m0
mova [dstq+mmsize*(1+%%i)], m1
mova [dstq+mmsize*(2+%%i)], m2
mova [dstq+mmsize*(3+%%i)], m3
%if %3
mova [dstq+mmsize*(4+%%i)], m7
mova [dstq+mmsize*(5+%%i)], m8
mova [dstq+mmsize*(6+%%i)], m9
mova [dstq+mmsize*(7+%%i)], m10
%endif
%assign %%i %%i+4*(%3+1)
%endrep
add srcq, mmsize*4*(%2+%3)
add dstq, mmsize*4*(%2+%3)
sub lend, mmsize*(%2+%3)
jg .loop
REP_RET
%endmacro
INIT_MMX mmx
%define CLIPD CLIPD_MMX
VECTOR_CLIP_INT32 0, 1, 0, 0
INIT_XMM sse2
VECTOR_CLIP_INT32 6, 1, 0, 0, _int
%define CLIPD CLIPD_SSE2
VECTOR_CLIP_INT32 6, 2, 0, 1
INIT_XMM sse4
%define CLIPD CLIPD_SSE41
%ifdef m8
VECTOR_CLIP_INT32 11, 1, 1, 0
%else
VECTOR_CLIP_INT32 6, 1, 0, 0
%endif
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_AUDIODSP_H
#define AVCODEC_X86_AUDIODSP_H
void ff_vector_clipf_sse(float *dst, const float *src,
float min, float max, int len);
#endif /* AVCODEC_X86_AUDIODSP_H */
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/audiodsp.h"
#include "audiodsp.h"
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
int order);
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
int order);
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags))
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
if (EXTERNAL_MMXEXT(cpu_flags))
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
if (EXTERNAL_SSE(cpu_flags))
c->vector_clipf = ff_vector_clipf_sse;
if (EXTERNAL_SSE2(cpu_flags)) {
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
if (cpu_flags & AV_CPU_FLAG_ATOM)
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
else
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
}
if (EXTERNAL_SSE4(cpu_flags))
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
}
...@@ -30,115 +30,6 @@ cextern pb_80 ...@@ -30,115 +30,6 @@ cextern pb_80
SECTION_TEXT SECTION_TEXT
%macro SCALARPRODUCT 0
; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
cglobal scalarproduct_int16, 3,3,3, v1, v2, order
shl orderq, 1
add v1q, orderq
add v2q, orderq
neg orderq
pxor m2, m2
.loop:
movu m0, [v1q + orderq]
movu m1, [v1q + orderq + mmsize]
pmaddwd m0, [v2q + orderq]
pmaddwd m1, [v2q + orderq + mmsize]
paddd m2, m0
paddd m2, m1
add orderq, mmsize*2
jl .loop
HADDD m2, m0
movd eax, m2
%if mmsize == 8
emms
%endif
RET
%endmacro
INIT_MMX mmxext
SCALARPRODUCT
INIT_XMM sse2
SCALARPRODUCT
;-----------------------------------------------------------------------------
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
; int32_t max, unsigned int len)
;-----------------------------------------------------------------------------
; %1 = number of xmm registers used
; %2 = number of inline load/process/store loops per asm loop
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
; %5 = suffix
%macro VECTOR_CLIP_INT32 4-5
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
%if %4
cvtsi2ss m4, minm
cvtsi2ss m5, maxm
%else
movd m4, minm
movd m5, maxm
%endif
SPLATD m4
SPLATD m5
.loop:
%assign %%i 0
%rep %2
mova m0, [srcq+mmsize*(0+%%i)]
mova m1, [srcq+mmsize*(1+%%i)]
mova m2, [srcq+mmsize*(2+%%i)]
mova m3, [srcq+mmsize*(3+%%i)]
%if %3
mova m7, [srcq+mmsize*(4+%%i)]
mova m8, [srcq+mmsize*(5+%%i)]
mova m9, [srcq+mmsize*(6+%%i)]
mova m10, [srcq+mmsize*(7+%%i)]
%endif
CLIPD m0, m4, m5, m6
CLIPD m1, m4, m5, m6
CLIPD m2, m4, m5, m6
CLIPD m3, m4, m5, m6
%if %3
CLIPD m7, m4, m5, m6
CLIPD m8, m4, m5, m6
CLIPD m9, m4, m5, m6
CLIPD m10, m4, m5, m6
%endif
mova [dstq+mmsize*(0+%%i)], m0
mova [dstq+mmsize*(1+%%i)], m1
mova [dstq+mmsize*(2+%%i)], m2
mova [dstq+mmsize*(3+%%i)], m3
%if %3
mova [dstq+mmsize*(4+%%i)], m7
mova [dstq+mmsize*(5+%%i)], m8
mova [dstq+mmsize*(6+%%i)], m9
mova [dstq+mmsize*(7+%%i)], m10
%endif
%assign %%i %%i+4*(%3+1)
%endrep
add srcq, mmsize*4*(%2+%3)
add dstq, mmsize*4*(%2+%3)
sub lend, mmsize*(%2+%3)
jg .loop
REP_RET
%endmacro
INIT_MMX mmx
%define CLIPD CLIPD_MMX
VECTOR_CLIP_INT32 0, 1, 0, 0
INIT_XMM sse2
VECTOR_CLIP_INT32 6, 1, 0, 0, _int
%define CLIPD CLIPD_SSE2
VECTOR_CLIP_INT32 6, 2, 0, 1
INIT_XMM sse4
%define CLIPD CLIPD_SSE41
%ifdef m8
VECTOR_CLIP_INT32 11, 1, 1, 0
%else
VECTOR_CLIP_INT32 6, 1, 0, 0
%endif
; %1 = aligned/unaligned ; %1 = aligned/unaligned
%macro BSWAP_LOOPS 1 %macro BSWAP_LOOPS 1
mov r3, r2 mov r3, r2
......
...@@ -29,23 +29,9 @@ ...@@ -29,23 +29,9 @@
#include "dsputil_x86.h" #include "dsputil_x86.h"
#include "idct_xvid.h" #include "idct_xvid.h"
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
int order);
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
int order);
void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
int cpu_flags, unsigned high_bit_depth) int cpu_flags, unsigned high_bit_depth)
{ {
...@@ -81,7 +67,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, ...@@ -81,7 +67,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_MMX_INLINE */ #endif /* HAVE_MMX_INLINE */
#if HAVE_MMX_EXTERNAL #if HAVE_MMX_EXTERNAL
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
#endif /* HAVE_MMX_EXTERNAL */ #endif /* HAVE_MMX_EXTERNAL */
} }
...@@ -96,19 +81,12 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, ...@@ -96,19 +81,12 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
c->idct = ff_idct_xvid_mmxext; c->idct = ff_idct_xvid_mmxext;
} }
#endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_MMXEXT_INLINE */
#if HAVE_MMXEXT_EXTERNAL
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
#endif /* HAVE_MMXEXT_EXTERNAL */
} }
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
int cpu_flags, unsigned high_bit_depth) int cpu_flags, unsigned high_bit_depth)
{ {
#if HAVE_YASM #if HAVE_YASM
#if HAVE_SSE_EXTERNAL
c->vector_clipf = ff_vector_clipf_sse;
#endif
#if HAVE_INLINE_ASM && CONFIG_VIDEODSP #if HAVE_INLINE_ASM && CONFIG_VIDEODSP
c->gmc = ff_gmc_sse; c->gmc = ff_gmc_sse;
#endif #endif
...@@ -128,12 +106,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, ...@@ -128,12 +106,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_SSE2_INLINE */ #endif /* HAVE_SSE2_INLINE */
#if HAVE_SSE2_EXTERNAL #if HAVE_SSE2_EXTERNAL
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
if (cpu_flags & AV_CPU_FLAG_ATOM) {
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
} else {
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
}
c->bswap_buf = ff_bswap32_buf_sse2; c->bswap_buf = ff_bswap32_buf_sse2;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
#endif /* HAVE_SSE2_EXTERNAL */ #endif /* HAVE_SSE2_EXTERNAL */
...@@ -147,14 +119,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, ...@@ -147,14 +119,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_SSSE3_EXTERNAL */ #endif /* HAVE_SSSE3_EXTERNAL */
} }
static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
int cpu_flags, unsigned high_bit_depth)
{
#if HAVE_SSE4_EXTERNAL
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
#endif /* HAVE_SSE4_EXTERNAL */
}
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth) unsigned high_bit_depth)
{ {
...@@ -175,9 +139,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, ...@@ -175,9 +139,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
if (EXTERNAL_SSSE3(cpu_flags)) if (EXTERNAL_SSSE3(cpu_flags))
dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);
if (EXTERNAL_SSE4(cpu_flags))
dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);
if (CONFIG_ENCODERS) if (CONFIG_ENCODERS)
ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
} }
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include "libavutil/x86/asm.h" #include "libavutil/x86/asm.h"
#include "libavcodec/pixels.h" #include "libavcodec/pixels.h"
#include "libavcodec/videodsp.h" #include "libavcodec/videodsp.h"
#include "constants.h"
#include "dsputil_x86.h" #include "dsputil_x86.h"
#include "inline_asm.h" #include "inline_asm.h"
......
...@@ -53,10 +53,6 @@ void ff_gmc_sse(uint8_t *dst, uint8_t *src, ...@@ -53,10 +53,6 @@ void ff_gmc_sse(uint8_t *dst, uint8_t *src,
int dxx, int dxy, int dyx, int dyy, int dxx, int dxy, int dyx, int dyy,
int shift, int r, int width, int height); int shift, int r, int width, int height);
void ff_vector_clipf_sse(float *dst, const float *src,
float min, float max, int len);
void ff_mmx_idct(int16_t *block); void ff_mmx_idct(int16_t *block);
void ff_mmxext_idct(int16_t *block); void ff_mmxext_idct(int16_t *block);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment