Commit a7acab6c authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  vc1dec: Remove separate scaling function for interlaced field MVs
  vc1dec: Invoke edge_emulation regardless of MV precision
  x86: Use consistent 3dnowext function and macro name suffixes
  g723_1: scale output as supposed for the case with postfilter disabled
  g723_1: increase excitation storage by 4
  g723_1: fix upper bound parameter from inverse maximum autocorrelation
  g723_1: make scale_vector() behave like the reference
  g723_1: fix off-by-one error in normalize_bits()
  g723_1: save/restore excitation with offset to store LPC history
  wmapro: prevent division by zero when sample rate is unspecified
  x86: proresdsp: improve SIGNEXTEND macro comments
  x86: h264dsp: K&R formatting cosmetics
  LICENSE: Document all GPL files

Conflicts:
	libavcodec/g723_1.c
	libavcodec/wmaprodec.c
	libavcodec/x86/h264dsp_mmx.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents a763cafc 9cc74c9f
...@@ -17,6 +17,14 @@ Specifically, the GPL parts of FFmpeg are ...@@ -17,6 +17,14 @@ Specifically, the GPL parts of FFmpeg are
- optional x86 optimizations in the files - optional x86 optimizations in the files
libavcodec/x86/idct_mmx.c libavcodec/x86/idct_mmx.c
- the X11 grabber in libavdevice/x11grab.c - the X11 grabber in libavdevice/x11grab.c
- the texi2pod.pl tool
- the following filters in libavfilter:
- vf_blackframe.c
- vf_boxblur.c
- vf_cropdetect.c
- vf_delogo.c
- vf_hqdn3d.c
- vf_yadif.c
There are a handful of files under other licensing terms, namely: There are a handful of files under other licensing terms, namely:
......
...@@ -52,7 +52,7 @@ typedef struct g723_1_context { ...@@ -52,7 +52,7 @@ typedef struct g723_1_context {
int16_t prev_lsp[LPC_ORDER]; int16_t prev_lsp[LPC_ORDER];
int16_t prev_excitation[PITCH_MAX]; int16_t prev_excitation[PITCH_MAX];
int16_t excitation[PITCH_MAX + FRAME_LEN]; int16_t excitation[PITCH_MAX + FRAME_LEN + 4];
int16_t synth_mem[LPC_ORDER]; int16_t synth_mem[LPC_ORDER];
int16_t fir_mem[LPC_ORDER]; int16_t fir_mem[LPC_ORDER];
int iir_mem[LPC_ORDER]; int iir_mem[LPC_ORDER];
...@@ -267,8 +267,10 @@ static int scale_vector(int16_t *vector, int length) ...@@ -267,8 +267,10 @@ static int scale_vector(int16_t *vector, int length)
bits = normalize_bits(max, 15); bits = normalize_bits(max, 15);
scale = shift_table[bits]; scale = shift_table[bits];
for (i = 0; i < length; i++) for (i = 0; i < length; i++) {
av_assert2(av_clipl_int32(vector[i] * (int64_t)scale << 1) == vector[i] * (int64_t)scale << 1);
vector[i] = (vector[i] * scale) >> 3; vector[i] = (vector[i] * scale) >> 3;
}
return bits - 3; return bits - 3;
} }
...@@ -592,7 +594,10 @@ static int autocorr_max(G723_1_Context *p, int offset, int *ccr_max, ...@@ -592,7 +594,10 @@ static int autocorr_max(G723_1_Context *p, int offset, int *ccr_max,
int i; int i;
pitch_lag = FFMIN(PITCH_MAX - 3, pitch_lag); pitch_lag = FFMIN(PITCH_MAX - 3, pitch_lag);
limit = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3); if (dir > 0)
limit = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3);
else
limit = pitch_lag + 3;
for (i = pitch_lag - 3; i <= limit; i++) { for (i = pitch_lag - 3; i <= limit; i++) {
ccr = ff_dot_product(buf, buf + dir * i, length)<<1; ccr = ff_dot_product(buf, buf + dir * i, length)<<1;
...@@ -967,7 +972,6 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, ...@@ -967,7 +972,6 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
G723_1_Context *p = avctx->priv_data; G723_1_Context *p = avctx->priv_data;
const uint8_t *buf = avpkt->data; const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size; int buf_size = avpkt->size;
int16_t *out;
int dec_mode = buf[0] & 3; int dec_mode = buf[0] & 3;
PPFParam ppf[SUBFRAMES]; PPFParam ppf[SUBFRAMES];
...@@ -975,6 +979,7 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, ...@@ -975,6 +979,7 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
int16_t lpc[SUBFRAMES * LPC_ORDER]; int16_t lpc[SUBFRAMES * LPC_ORDER];
int16_t acb_vector[SUBFRAME_LEN]; int16_t acb_vector[SUBFRAME_LEN];
int16_t *vector_ptr; int16_t *vector_ptr;
int16_t *out;
int bad_frame = 0, i, j, ret; int bad_frame = 0, i, j, ret;
if (!buf_size || buf_size < frame_size[dec_mode]) { if (!buf_size || buf_size < frame_size[dec_mode]) {
...@@ -995,8 +1000,8 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, ...@@ -995,8 +1000,8 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return ret; return ret;
} }
out= (int16_t*)p->frame.data[0];
out = (int16_t *)p->frame.data[0];
if (p->cur_frame_type == ACTIVE_FRAME) { if (p->cur_frame_type == ACTIVE_FRAME) {
if (!bad_frame) if (!bad_frame)
...@@ -1079,7 +1084,7 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, ...@@ -1079,7 +1084,7 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
memcpy(p->prev_excitation, p->excitation + FRAME_LEN, memcpy(p->prev_excitation, p->excitation + FRAME_LEN,
PITCH_MAX * sizeof(*p->excitation)); PITCH_MAX * sizeof(*p->excitation));
} else { } else {
memset(out, 0, sizeof(int16_t)*FRAME_LEN); memset(out, 0, FRAME_LEN * 2);
av_log(avctx, AV_LOG_WARNING, av_log(avctx, AV_LOG_WARNING,
"G.723.1: Comfort noise generation not supported yet\n"); "G.723.1: Comfort noise generation not supported yet\n");
return frame_size[dec_mode]; return frame_size[dec_mode];
...@@ -1094,13 +1099,18 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, ...@@ -1094,13 +1099,18 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
0, 1, 1 << 12); 0, 1, 1 << 12);
memcpy(p->synth_mem, out + FRAME_LEN, LPC_ORDER * sizeof(int16_t)); memcpy(p->synth_mem, out + FRAME_LEN, LPC_ORDER * sizeof(int16_t));
if (p->postfilter) if (p->postfilter) {
formant_postfilter(p, lpc, out); formant_postfilter(p, lpc, out);
} else { // if output is not postfiltered it should be scaled by 2
for (i = 0; i < FRAME_LEN; i++)
out[LPC_ORDER + i] = av_clip_int16(out[LPC_ORDER + i] << 1);
}
memmove(out, out + LPC_ORDER, sizeof(int16_t)*FRAME_LEN); memmove(out, out + LPC_ORDER, sizeof(int16_t)*FRAME_LEN);
p->frame.nb_samples = FRAME_LEN; p->frame.nb_samples = FRAME_LEN;
*(AVFrame*)data = p->frame;
*got_frame_ptr = 1; *got_frame_ptr = 1;
*(AVFrame *)data = p->frame;
return frame_size[dec_mode]; return frame_size[dec_mode];
} }
......
...@@ -1882,8 +1882,8 @@ static void vc1_interp_mc(VC1Context *v) ...@@ -1882,8 +1882,8 @@ static void vc1_interp_mc(VC1Context *v)
} }
if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22 if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22
|| (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 16 - s->mspel * 3 || (unsigned)(src_x - 1) > s->h_edge_pos - (mx & 3) - 16 - 3
|| (unsigned)(src_y - s->mspel) > v_edge_pos - (my & 3) - 16 - s->mspel * 3) { || (unsigned)(src_y - 1) > v_edge_pos - (my & 3) - 16 - 3) {
uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize; uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
srcY -= s->mspel * (1 + s->linesize); srcY -= s->mspel * (1 + s->linesize);
...@@ -1979,20 +1979,6 @@ static av_always_inline int scale_mv(int value, int bfrac, int inv, int qs) ...@@ -1979,20 +1979,6 @@ static av_always_inline int scale_mv(int value, int bfrac, int inv, int qs)
#endif #endif
} }
static av_always_inline int scale_mv_intfi(int value, int bfrac, int inv,
int qs, int qs_last)
{
int n = bfrac;
if (inv)
n -= 256;
n <<= !qs_last;
if (!qs)
return (value * n + 255) >> 9;
else
return (value * n + 128) >> 8;
}
/** Reconstruct motion vector for B-frame and do motion compensation /** Reconstruct motion vector for B-frame and do motion compensation
*/ */
static inline void vc1_b_mc(VC1Context *v, int dmv_x[2], int dmv_y[2], static inline void vc1_b_mc(VC1Context *v, int dmv_x[2], int dmv_y[2],
...@@ -2246,14 +2232,14 @@ static inline void vc1_pred_b_mv_intfi(VC1Context *v, int n, int *dmv_x, int *dm ...@@ -2246,14 +2232,14 @@ static inline void vc1_pred_b_mv_intfi(VC1Context *v, int n, int *dmv_x, int *dm
if (v->bmvtype == BMV_TYPE_DIRECT) { if (v->bmvtype == BMV_TYPE_DIRECT) {
int total_opp, k, f; int total_opp, k, f;
if (s->next_picture.f.mb_type[mb_pos + v->mb_off] != MB_TYPE_INTRA) { if (s->next_picture.f.mb_type[mb_pos + v->mb_off] != MB_TYPE_INTRA) {
s->mv[0][0][0] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0], s->mv[0][0][0] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0],
v->bfraction, 0, s->quarter_sample, v->qs_last); v->bfraction, 0, s->quarter_sample);
s->mv[0][0][1] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1], s->mv[0][0][1] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1],
v->bfraction, 0, s->quarter_sample, v->qs_last); v->bfraction, 0, s->quarter_sample);
s->mv[1][0][0] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0], s->mv[1][0][0] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0],
v->bfraction, 1, s->quarter_sample, v->qs_last); v->bfraction, 1, s->quarter_sample);
s->mv[1][0][1] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1], s->mv[1][0][1] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1],
v->bfraction, 1, s->quarter_sample, v->qs_last); v->bfraction, 1, s->quarter_sample);
total_opp = v->mv_f_next[0][s->block_index[0] + v->blocks_off] total_opp = v->mv_f_next[0][s->block_index[0] + v->blocks_off]
+ v->mv_f_next[0][s->block_index[1] + v->blocks_off] + v->mv_f_next[0][s->block_index[1] + v->blocks_off]
......
...@@ -341,6 +341,11 @@ static av_cold int decode_init(AVCodecContext *avctx) ...@@ -341,6 +341,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
if (s->avctx->sample_rate <= 0) {
av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
return AVERROR_INVALIDDATA;
}
s->num_channels = avctx->channels; s->num_channels = avctx->channels;
if (s->num_channels < 0) { if (s->num_channels < 0) {
......
...@@ -2485,9 +2485,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], ...@@ -2485,9 +2485,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2],
} }
#if HAVE_6REGS #if HAVE_6REGS
static void vector_fmul_window_3dnow2(float *dst, const float *src0, static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win, const float *src1, const float *win,
int len) int len)
{ {
x86_reg i = -len * 4; x86_reg i = -len * 4;
x86_reg j = len * 4 - 8; x86_reg j = len * 4 - 8;
...@@ -2939,11 +2939,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, ...@@ -2939,11 +2939,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
#endif #endif
} }
static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx,
int mm_flags) int mm_flags)
{ {
#if HAVE_6REGS && HAVE_INLINE_ASM #if HAVE_6REGS && HAVE_INLINE_ASM
c->vector_fmul_window = vector_fmul_window_3dnow2; c->vector_fmul_window = vector_fmul_window_3dnowext;
#endif #endif
} }
...@@ -3194,7 +3194,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) ...@@ -3194,7 +3194,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
dsputil_init_3dnow(c, avctx, mm_flags); dsputil_init_3dnow(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT)
dsputil_init_3dnow2(c, avctx, mm_flags); dsputil_init_3dnowext(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE)
dsputil_init_sse(c, avctx, mm_flags); dsputil_init_sse(c, avctx, mm_flags);
......
...@@ -34,9 +34,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s) ...@@ -34,9 +34,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
} }
if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
/* 3DNowEx for K7 */ /* 3DNowEx for K7 */
s->imdct_calc = ff_imdct_calc_3dnow2; s->imdct_calc = ff_imdct_calc_3dnowext;
s->imdct_half = ff_imdct_half_3dnow2; s->imdct_half = ff_imdct_half_3dnowext;
s->fft_calc = ff_fft_calc_3dnow2; s->fft_calc = ff_fft_calc_3dnowext;
} }
#endif #endif
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
......
...@@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); ...@@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z); void ff_fft_calc_avx(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z); void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z);
void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
......
...@@ -93,14 +93,14 @@ cextern cos_ %+ i ...@@ -93,14 +93,14 @@ cextern cos_ %+ i
SECTION_TEXT SECTION_TEXT
%macro T2_3DN 4 ; z0, z1, mem0, mem1 %macro T2_3DNOW 4 ; z0, z1, mem0, mem1
mova %1, %3 mova %1, %3
mova %2, %1 mova %2, %1
pfadd %1, %4 pfadd %1, %4
pfsub %2, %4 pfsub %2, %4
%endmacro %endmacro
%macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1 %macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1
mova %5, %3 mova %5, %3
pfsub %3, %4 pfsub %3, %4
pfadd %5, %4 ; {t6,t5} pfadd %5, %4 ; {t6,t5}
...@@ -445,13 +445,13 @@ fft16_sse: ...@@ -445,13 +445,13 @@ fft16_sse:
ret ret
%macro FFT48_3DN 0 %macro FFT48_3DNOW 0
align 16 align 16
fft4 %+ SUFFIX: fft4 %+ SUFFIX:
T2_3DN m0, m1, Z(0), Z(1) T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2) mova m2, Z(2)
mova m3, Z(3) mova m3, Z(3)
T4_3DN m0, m1, m2, m3, m4, m5 T4_3DNOW m0, m1, m2, m3, m4, m5
PUNPCK m0, m1, m4 PUNPCK m0, m1, m4
PUNPCK m2, m3, m5 PUNPCK m2, m3, m5
mova Z(0), m0 mova Z(0), m0
...@@ -462,14 +462,14 @@ fft4 %+ SUFFIX: ...@@ -462,14 +462,14 @@ fft4 %+ SUFFIX:
align 16 align 16
fft8 %+ SUFFIX: fft8 %+ SUFFIX:
T2_3DN m0, m1, Z(0), Z(1) T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2) mova m2, Z(2)
mova m3, Z(3) mova m3, Z(3)
T4_3DN m0, m1, m2, m3, m4, m5 T4_3DNOW m0, m1, m2, m3, m4, m5
mova Z(0), m0 mova Z(0), m0
mova Z(2), m2 mova Z(2), m2
T2_3DN m4, m5, Z(4), Z(5) T2_3DNOW m4, m5, Z(4), Z(5)
T2_3DN m6, m7, Z2(6), Z2(7) T2_3DNOW m6, m7, Z2(6), Z2(7)
PSWAPD m0, m5 PSWAPD m0, m5
PSWAPD m2, m7 PSWAPD m2, m7
pxor m0, [ps_m1p1] pxor m0, [ps_m1p1]
...@@ -478,12 +478,12 @@ fft8 %+ SUFFIX: ...@@ -478,12 +478,12 @@ fft8 %+ SUFFIX:
pfadd m7, m2 pfadd m7, m2
pfmul m5, [ps_root2] pfmul m5, [ps_root2]
pfmul m7, [ps_root2] pfmul m7, [ps_root2]
T4_3DN m1, m3, m5, m7, m0, m2 T4_3DNOW m1, m3, m5, m7, m0, m2
mova Z(5), m5 mova Z(5), m5
mova Z2(7), m7 mova Z2(7), m7
mova m0, Z(0) mova m0, Z(0)
mova m2, Z(2) mova m2, Z(2)
T4_3DN m0, m2, m4, m6, m5, m7 T4_3DNOW m0, m2, m4, m6, m5, m7
PUNPCK m0, m1, m5 PUNPCK m0, m1, m5
PUNPCK m2, m3, m7 PUNPCK m2, m3, m7
mova Z(0), m0 mova Z(0), m0
...@@ -501,7 +501,7 @@ fft8 %+ SUFFIX: ...@@ -501,7 +501,7 @@ fft8 %+ SUFFIX:
%if ARCH_X86_32 %if ARCH_X86_32
%macro PSWAPD 2 %macro PSWAPD 2
%if cpuflag(3dnow2) %if cpuflag(3dnowext)
pswapd %1, %2 pswapd %1, %2
%elifidn %1, %2 %elifidn %1, %2
movd [r0+12], %1 movd [r0+12], %1
...@@ -513,11 +513,11 @@ fft8 %+ SUFFIX: ...@@ -513,11 +513,11 @@ fft8 %+ SUFFIX:
%endif %endif
%endmacro %endmacro
INIT_MMX 3dnow2 INIT_MMX 3dnowext
FFT48_3DN FFT48_3DNOW
INIT_MMX 3dnow INIT_MMX 3dnow
FFT48_3DN FFT48_3DNOW
%endif %endif
%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] %define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
...@@ -634,7 +634,7 @@ cglobal fft_calc, 2,5,8 ...@@ -634,7 +634,7 @@ cglobal fft_calc, 2,5,8
%if ARCH_X86_32 %if ARCH_X86_32
INIT_MMX 3dnow INIT_MMX 3dnow
FFT_CALC_FUNC FFT_CALC_FUNC
INIT_MMX 3dnow2 INIT_MMX 3dnowext
FFT_CALC_FUNC FFT_CALC_FUNC
%endif %endif
INIT_XMM sse INIT_XMM sse
...@@ -728,7 +728,7 @@ cglobal imdct_calc, 3,5,3 ...@@ -728,7 +728,7 @@ cglobal imdct_calc, 3,5,3
%if ARCH_X86_32 %if ARCH_X86_32
INIT_MMX 3dnow INIT_MMX 3dnow
IMDCT_CALC_FUNC IMDCT_CALC_FUNC
INIT_MMX 3dnow2 INIT_MMX 3dnowext
IMDCT_CALC_FUNC IMDCT_CALC_FUNC
%endif %endif
...@@ -744,8 +744,8 @@ INIT_MMX 3dnow ...@@ -744,8 +744,8 @@ INIT_MMX 3dnow
%define unpckhps punpckhdq %define unpckhps punpckhdq
DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q]
DECL_PASS pass_interleave_3dnow, PASS_BIG 0 DECL_PASS pass_interleave_3dnow, PASS_BIG 0
%define pass_3dnow2 pass_3dnow %define pass_3dnowext pass_3dnow
%define pass_interleave_3dnow2 pass_interleave_3dnow %define pass_interleave_3dnowext pass_interleave_3dnow
%endif %endif
%ifdef PIC %ifdef PIC
...@@ -814,7 +814,7 @@ DECL_FFT 5, _interleave ...@@ -814,7 +814,7 @@ DECL_FFT 5, _interleave
INIT_MMX 3dnow INIT_MMX 3dnow
DECL_FFT 4 DECL_FFT 4
DECL_FFT 4, _interleave DECL_FFT 4, _interleave
INIT_MMX 3dnow2 INIT_MMX 3dnowext
DECL_FFT 4 DECL_FFT 4
DECL_FFT 4, _interleave DECL_FFT 4, _interleave
%endif %endif
...@@ -846,7 +846,7 @@ INIT_XMM sse ...@@ -846,7 +846,7 @@ INIT_XMM sse
PSWAPD m5, m3 PSWAPD m5, m3
pfmul m2, m3 pfmul m2, m3
pfmul m6, m5 pfmul m6, m5
%if cpuflag(3dnow2) %if cpuflag(3dnowext)
pfpnacc m0, m4 pfpnacc m0, m4
pfpnacc m2, m6 pfpnacc m2, m6
%else %else
...@@ -1019,7 +1019,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i ...@@ -1019,7 +1019,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
xor r4, r4 xor r4, r4
sub r4, r3 sub r4, r3
%endif %endif
%if notcpuflag(3dnow2) && mmsize == 8 %if notcpuflag(3dnowext) && mmsize == 8
movd m7, [ps_m1m1m1m1] movd m7, [ps_m1m1m1m1]
%endif %endif
.pre: .pre:
...@@ -1103,7 +1103,7 @@ DECL_IMDCT POSROTATESHUF ...@@ -1103,7 +1103,7 @@ DECL_IMDCT POSROTATESHUF
INIT_MMX 3dnow INIT_MMX 3dnow
DECL_IMDCT POSROTATESHUF_3DNOW DECL_IMDCT POSROTATESHUF_3DNOW
INIT_MMX 3dnow2 INIT_MMX 3dnowext
DECL_IMDCT POSROTATESHUF_3DNOW DECL_IMDCT POSROTATESHUF_3DNOW
%endif %endif
......
...@@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 ...@@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2 %macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e pshufw %1, %2, 0x4e
%endmacro %endmacro
%macro PSWAPD_3DN1 2 %macro PSWAPD_3DNOW 2
movq %1, %2 movq %1, %2
psrlq %1, 32 psrlq %1, 32
punpckldq %1, %2 punpckldq %1, %2
...@@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, ...@@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
%define pswapd PSWAPD_SSE %define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6 sse FLOAT_TO_INT16_INTERLEAVE6 sse
%define cvtps2pi pf2id %define cvtps2pi pf2id
%define pswapd PSWAPD_3DN1 %define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6 3dnow FLOAT_TO_INT16_INTERLEAVE6 3dnow
%undef pswapd %undef pswapd
FLOAT_TO_INT16_INTERLEAVE6 3dn2 FLOAT_TO_INT16_INTERLEAVE6 3dnowext
%undef cvtps2pi %undef cvtps2pi
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
......
...@@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l ...@@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
...@@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow) ...@@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow)
FLOAT_TO_INT16_INTERLEAVE(sse) FLOAT_TO_INT16_INTERLEAVE(sse)
FLOAT_TO_INT16_INTERLEAVE(sse2) FLOAT_TO_INT16_INTERLEAVE(sse2)
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
long len, int channels)
{
if(channels==6) if(channels==6)
ff_float_to_int16_interleave6_3dn2(dst, src, len); ff_float_to_int16_interleave6_3dnowext(dst, src, len);
else else
float_to_int16_interleave_3dnow(dst, src, len, channels); float_to_int16_interleave_3dnow(dst, src, len, channels);
} }
...@@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) ...@@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
} }
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2; c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
} }
} }
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
......
This diff is collapsed.
...@@ -301,12 +301,12 @@ cglobal prores_idct_put_10, 4, 4, %1 ...@@ -301,12 +301,12 @@ cglobal prores_idct_put_10, 4, 4, %1
RET RET
%endmacro %endmacro
%macro SIGNEXTEND 2-3 ; dstlow, dsthigh, tmp %macro SIGNEXTEND 2-3
%if cpuflag(sse4) %if cpuflag(sse4) ; dstlow, dsthigh
movhlps %2, %1 movhlps %2, %1
pmovsxwd %1, %1 pmovsxwd %1, %1
pmovsxwd %2, %2 pmovsxwd %2, %2
%else ; sse2 %elif cpuflag(sse2) ; dstlow, dsthigh, tmp
pxor %3, %3 pxor %3, %3
pcmpgtw %3, %1 pcmpgtw %3, %1
mova %2, %1 mova %2, %1
......
...@@ -590,7 +590,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ...@@ -590,7 +590,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags_mmx (1<<0) %assign cpuflags_mmx (1<<0)
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx %assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx %assign cpuflags_3dnow (1<<2) | cpuflags_mmx
%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow %assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
%assign cpuflags_sse (1<<4) | cpuflags_mmx2 %assign cpuflags_sse (1<<4) | cpuflags_mmx2
%assign cpuflags_sse2 (1<<5) | cpuflags_sse %assign cpuflags_sse2 (1<<5) | cpuflags_sse
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
......
dec0deb2425e908d232d2471acff04a3 *tests/data/fate/acodec-g723_1.g723_1 dec0deb2425e908d232d2471acff04a3 *tests/data/fate/acodec-g723_1.g723_1
4800 tests/data/fate/acodec-g723_1.g723_1 4800 tests/data/fate/acodec-g723_1.g723_1
90b20555c962b638dad0e98ac2c05b25 *tests/data/fate/acodec-g723_1.out.wav 87fd529c9e41914f73a865d147cc9516 *tests/data/fate/acodec-g723_1.out.wav
stddev: 8418.34 PSNR: 17.82 MAXDIFF:52968 bytes: 95992/ 96000 stddev: 8425.98 PSNR: 17.82 MAXDIFF:53268 bytes: 95992/ 96000
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment