Commit 2684d2e3 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '284ea790'

* commit '284ea790':
  dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil
  aacenc: use the correct output buffer
  aacdec: fix signed overflows in lcg_random()
  base64: fix signed overflow in shift

Conflicts:
	libavcodec/dsputil.c
	libavutil/base64.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 25719620 284ea790
...@@ -819,7 +819,8 @@ static int decode_audio_specific_config(AACContext *ac, ...@@ -819,7 +819,8 @@ static int decode_audio_specific_config(AACContext *ac,
*/ */
static av_always_inline int lcg_random(unsigned previous_val) static av_always_inline int lcg_random(unsigned previous_val)
{ {
return previous_val * 1664525 + 1013904223; union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
return v.s;
} }
static av_always_inline void reset_predict_state(PredictorState *ps) static av_always_inline void reset_predict_state(PredictorState *ps)
...@@ -1394,7 +1395,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1394,7 +1395,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len); band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
scale = sf[idx] / sqrtf(band_energy); scale = sf[idx] / sqrtf(band_energy);
ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len); ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
} }
} else { } else {
const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
...@@ -1540,7 +1541,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1540,7 +1541,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
} }
} while (len -= 2); } while (len -= 2);
ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
} }
} }
...@@ -1764,10 +1765,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p ...@@ -1764,10 +1765,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p
c *= 1 - 2 * cpe->ms_mask[idx]; c *= 1 - 2 * cpe->ms_mask[idx];
scale = c * sce1->sf[idx]; scale = c * sce1->sf[idx];
for (group = 0; group < ics->group_len[g]; group++) for (group = 0; group < ics->group_len[g]; group++)
ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
coef0 + group * 128 + offsets[i], coef0 + group * 128 + offsets[i],
scale, scale,
offsets[i + 1] - offsets[i]); offsets[i + 1] - offsets[i]);
} }
} else { } else {
int bt_run_end = sce1->band_type_run_end[idx]; int bt_run_end = sce1->band_type_run_end[idx];
......
...@@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); ...@@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_vector_fmul_window_neon(float *dst, const float *src0, void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win, int len); const float *src1, const float *win, int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
int len);
void ff_butterflies_float_neon(float *v1, float *v2, int len); void ff_butterflies_float_neon(float *v1, float *v2, int len);
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
void ff_vector_fmul_reverse_neon(float *dst, const float *src0, void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
...@@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) ...@@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
} }
c->vector_fmul_window = ff_vector_fmul_window_neon; c->vector_fmul_window = ff_vector_fmul_window_neon;
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
c->butterflies_float = ff_butterflies_float_neon; c->butterflies_float = ff_butterflies_float_neon;
c->scalarproduct_float = ff_scalarproduct_float_neon; c->scalarproduct_float = ff_scalarproduct_float_neon;
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
......
...@@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1 ...@@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1
endfunc endfunc
#endif #endif
function ff_vector_fmul_scalar_neon, export=1
VFP len .req r2
NOVFP len .req r3
VFP vdup.32 q8, d0[0]
NOVFP vdup.32 q8, r2
bics r12, len, #15
beq 3f
vld1.32 {q0},[r1,:128]!
vld1.32 {q1},[r1,:128]!
1: vmul.f32 q0, q0, q8
vld1.32 {q2},[r1,:128]!
vmul.f32 q1, q1, q8
vld1.32 {q3},[r1,:128]!
vmul.f32 q2, q2, q8
vst1.32 {q0},[r0,:128]!
vmul.f32 q3, q3, q8
vst1.32 {q1},[r0,:128]!
subs r12, r12, #16
beq 2f
vld1.32 {q0},[r1,:128]!
vst1.32 {q2},[r0,:128]!
vld1.32 {q1},[r1,:128]!
vst1.32 {q3},[r0,:128]!
b 1b
2: vst1.32 {q2},[r0,:128]!
vst1.32 {q3},[r0,:128]!
ands len, len, #15
it eq
bxeq lr
3: vld1.32 {q0},[r1,:128]!
vmul.f32 q0, q0, q8
vst1.32 {q0},[r0,:128]!
subs len, len, #4
bgt 3b
bx lr
.unreq len
endfunc
function ff_butterflies_float_neon, export=1 function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128] 1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128] vld1.32 {q1},[r1,:128]
......
...@@ -2514,14 +2514,6 @@ static void vector_fmul_window_c(float *dst, const float *src0, ...@@ -2514,14 +2514,6 @@ static void vector_fmul_window_c(float *dst, const float *src0,
} }
} }
static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
int len)
{
int i;
for (i = 0; i < len; i++)
dst[i] = src[i] * mul;
}
static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2, static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2,
int len) int len)
{ {
...@@ -3039,7 +3031,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -3039,7 +3031,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->scalarproduct_float = ff_scalarproduct_float_c; c->scalarproduct_float = ff_scalarproduct_float_c;
c->butterflies_float = butterflies_float_c; c->butterflies_float = butterflies_float_c;
c->butterflies_float_interleave = butterflies_float_interleave_c; c->butterflies_float_interleave = butterflies_float_interleave_c;
c->vector_fmul_scalar = vector_fmul_scalar_c;
c->shrink[0]= av_image_copy_plane; c->shrink[0]= av_image_copy_plane;
c->shrink[1]= ff_shrink22; c->shrink[1]= ff_shrink22;
......
...@@ -392,16 +392,6 @@ typedef struct DSPContext { ...@@ -392,16 +392,6 @@ typedef struct DSPContext {
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len); void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */ /* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
/**
* Multiply a vector of floats by a scalar float. Source and
* destination vectors must overlap exactly or not at all.
* @param dst result vector, 16-byte aligned
* @param src input vector, 16-byte aligned
* @param mul scalar value
* @param len length of vector, multiple of 4
*/
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
int len);
/** /**
* Calculate the scalar product of two vectors of floats. * Calculate the scalar product of two vectors of floats.
* @param v1 first vector, 16-byte aligned * @param v1 first vector, 16-byte aligned
......
...@@ -28,12 +28,12 @@ ...@@ -28,12 +28,12 @@
#include "libavutil/channel_layout.h" #include "libavutil/channel_layout.h"
#include "libavutil/common.h" #include "libavutil/common.h"
#include "libavutil/float_dsp.h"
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "libavutil/log.h" #include "libavutil/log.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "avcodec.h" #include "avcodec.h"
#include "audio_frame_queue.h" #include "audio_frame_queue.h"
#include "dsputil.h"
#include "internal.h" #include "internal.h"
#include "mpegaudio.h" #include "mpegaudio.h"
#include "mpegaudiodecheader.h" #include "mpegaudiodecheader.h"
...@@ -50,7 +50,7 @@ typedef struct LAMEContext { ...@@ -50,7 +50,7 @@ typedef struct LAMEContext {
int reservoir; int reservoir;
float *samples_flt[2]; float *samples_flt[2];
AudioFrameQueue afq; AudioFrameQueue afq;
DSPContext dsp; AVFloatDSPContext fdsp;
} LAMEContext; } LAMEContext;
...@@ -168,7 +168,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx) ...@@ -168,7 +168,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
if (ret < 0) if (ret < 0)
goto error; goto error;
ff_dsputil_init(&s->dsp, avctx); avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
return 0; return 0;
error: error:
...@@ -206,10 +206,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, ...@@ -206,10 +206,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
return AVERROR(EINVAL); return AVERROR(EINVAL);
} }
for (ch = 0; ch < avctx->channels; ch++) { for (ch = 0; ch < avctx->channels; ch++) {
s->dsp.vector_fmul_scalar(s->samples_flt[ch], s->fdsp.vector_fmul_scalar(s->samples_flt[ch],
(const float *)frame->data[ch], (const float *)frame->data[ch],
32768.0f, 32768.0f,
FFALIGN(frame->nb_samples, 8)); FFALIGN(frame->nb_samples, 8));
} }
ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt); ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt);
break; break;
......
...@@ -108,7 +108,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame) ...@@ -108,7 +108,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame)
for (ch = 0; ch < avctx->channels; ch++) { for (ch = 0; ch < avctx->channels; ch++) {
memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
s->dsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len); s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len);
s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
mdct->mdct_calc(mdct, s->coefs[ch], s->output); mdct->mdct_calc(mdct, s->coefs[ch], s->output);
......
...@@ -86,6 +86,7 @@ ...@@ -86,6 +86,7 @@
* subframe in order to reconstruct the output samples. * subframe in order to reconstruct the output samples.
*/ */
#include "libavutil/float_dsp.h"
#include "libavutil/intfloat.h" #include "libavutil/intfloat.h"
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "avcodec.h" #include "avcodec.h"
...@@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx { ...@@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx {
AVCodecContext* avctx; ///< codec context for av_log AVCodecContext* avctx; ///< codec context for av_log
AVFrame frame; ///< AVFrame for decoded output AVFrame frame; ///< AVFrame for decoded output
DSPContext dsp; ///< accelerated DSP functions DSPContext dsp; ///< accelerated DSP functions
AVFloatDSPContext fdsp;
uint8_t frame_data[MAX_FRAMESIZE + uint8_t frame_data[MAX_FRAMESIZE +
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
PutBitContext pb; ///< context for filling the frame_data buffer PutBitContext pb; ///< context for filling the frame_data buffer
...@@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx) ...@@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
s->avctx = avctx; s->avctx = avctx;
ff_dsputil_init(&s->dsp, avctx); ff_dsputil_init(&s->dsp, avctx);
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE); init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
...@@ -1018,12 +1022,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s) ...@@ -1018,12 +1022,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
} }
} else if (s->avctx->channels == 2) { } else if (s->avctx->channels == 2) {
int len = FFMIN(sfb[1], s->subframe_len) - sfb[0]; int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0], s->fdsp.vector_fmul_scalar(ch_data[0] + sfb[0],
ch_data[0] + sfb[0], ch_data[0] + sfb[0],
181.0 / 128, len); 181.0 / 128, len);
s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0], s->fdsp.vector_fmul_scalar(ch_data[1] + sfb[0],
ch_data[1] + sfb[0], ch_data[1] + sfb[0],
181.0 / 128, len); 181.0 / 128, len);
} }
} }
} }
...@@ -1270,9 +1274,9 @@ static int decode_subframe(WMAProDecodeCtx *s) ...@@ -1270,9 +1274,9 @@ static int decode_subframe(WMAProDecodeCtx *s)
s->channel[c].scale_factor_step; s->channel[c].scale_factor_step;
const float quant = pow(10.0, exp / 20.0); const float quant = pow(10.0, exp / 20.0);
int start = s->cur_sfb_offsets[b]; int start = s->cur_sfb_offsets[b];
s->dsp.vector_fmul_scalar(s->tmp + start, s->fdsp.vector_fmul_scalar(s->tmp + start,
s->channel[c].coeffs + start, s->channel[c].coeffs + start,
quant, end - start); quant, end - start);
} }
/** apply imdct (imdct_half == DCTIV with reverse) */ /** apply imdct (imdct_half == DCTIV with reverse) */
......
...@@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l ...@@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
int len); int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
int len);
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
{ {
fdsp->vector_fmul = ff_vector_fmul_neon; fdsp->vector_fmul = ff_vector_fmul_neon;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
} }
...@@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2 ...@@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2
bx lr bx lr
.unreq len .unreq len
endfunc endfunc
function ff_vector_fmul_scalar_neon, export=1
VFP len .req r2
NOVFP len .req r3
VFP vdup.32 q8, d0[0]
NOVFP vdup.32 q8, r2
bics r12, len, #15
beq 3f
vld1.32 {q0},[r1,:128]!
vld1.32 {q1},[r1,:128]!
1: vmul.f32 q0, q0, q8
vld1.32 {q2},[r1,:128]!
vmul.f32 q1, q1, q8
vld1.32 {q3},[r1,:128]!
vmul.f32 q2, q2, q8
vst1.32 {q0},[r0,:128]!
vmul.f32 q3, q3, q8
vst1.32 {q1},[r0,:128]!
subs r12, r12, #16
beq 2f
vld1.32 {q0},[r1,:128]!
vst1.32 {q2},[r0,:128]!
vld1.32 {q1},[r1,:128]!
vst1.32 {q3},[r0,:128]!
b 1b
2: vst1.32 {q2},[r0,:128]!
vst1.32 {q3},[r0,:128]!
ands len, len, #15
it eq
bxeq lr
3: vld1.32 {q0},[r1,:128]!
vmul.f32 q0, q0, q8
vst1.32 {q0},[r0,:128]!
subs len, len, #4
bgt 3b
bx lr
.unreq len
endfunc
...@@ -39,10 +39,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul, ...@@ -39,10 +39,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
dst[i] += src[i] * mul; dst[i] += src[i] * mul;
} }
static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
int len)
{
int i;
for (i = 0; i < len; i++)
dst[i] = src[i] * mul;
}
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
{ {
fdsp->vector_fmul = vector_fmul_c; fdsp->vector_fmul = vector_fmul_c;
fdsp->vector_fmac_scalar = vector_fmac_scalar_c; fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
#if ARCH_ARM #if ARCH_ARM
ff_float_dsp_init_arm(fdsp); ff_float_dsp_init_arm(fdsp);
......
...@@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext { ...@@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext {
*/ */
void (*vector_fmac_scalar)(float *dst, const float *src, float mul, void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
int len); int len);
/**
* Multiply a vector of floats by a scalar float. Source and
* destination vectors must overlap exactly or not at all.
*
* @param dst result vector
* constraints: 16-byte aligned
* @param src input vector
* constraints: 16-byte aligned
* @param mul scalar value
* @param len length of vector
* constraints: multiple of 4
*/
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
int len);
} AVFloatDSPContext; } AVFloatDSPContext;
/** /**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment