Commit 915a2a0a authored by Diego Biurrun's avatar Diego Biurrun

x86: conditionally compile H.264 QPEL optimizations

parent 3816642e
...@@ -1165,6 +1165,7 @@ CONFIG_EXTRA=" ...@@ -1165,6 +1165,7 @@ CONFIG_EXTRA="
h264chroma h264chroma
h264dsp h264dsp
h264pred h264pred
h264qpel
huffman huffman
lgplv3 lgplv3
lpc lpc
...@@ -1311,7 +1312,7 @@ h263_encoder_select="aandct" ...@@ -1311,7 +1312,7 @@ h263_encoder_select="aandct"
h263_vaapi_hwaccel_select="vaapi h263_decoder" h263_vaapi_hwaccel_select="vaapi h263_decoder"
h263i_decoder_select="h263_decoder" h263i_decoder_select="h263_decoder"
h263p_encoder_select="h263_encoder" h263p_encoder_select="h263_encoder"
h264_decoder_select="golomb h264chroma h264dsp h264pred" h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
h264_dxva2_hwaccel_deps="dxva2api_h" h264_dxva2_hwaccel_deps="dxva2api_h"
h264_dxva2_hwaccel_select="dxva2 h264_decoder" h264_dxva2_hwaccel_select="dxva2 h264_decoder"
h264_vaapi_hwaccel_select="vaapi h264_decoder" h264_vaapi_hwaccel_select="vaapi h264_decoder"
...@@ -1366,14 +1367,14 @@ rv10_decoder_select="h263_decoder" ...@@ -1366,14 +1367,14 @@ rv10_decoder_select="h263_decoder"
rv10_encoder_select="h263_encoder" rv10_encoder_select="h263_encoder"
rv20_decoder_select="h263_decoder" rv20_decoder_select="h263_decoder"
rv20_encoder_select="h263_encoder" rv20_encoder_select="h263_encoder"
rv30_decoder_select="golomb h264chroma h264pred" rv30_decoder_select="golomb h264chroma h264pred h264qpel"
rv40_decoder_select="golomb h264chroma h264pred" rv40_decoder_select="golomb h264chroma h264pred h264qpel"
shorten_decoder_select="golomb" shorten_decoder_select="golomb"
sipr_decoder_select="lsp" sipr_decoder_select="lsp"
snow_decoder_select="dwt" snow_decoder_select="dwt"
snow_encoder_select="aandct dwt" snow_encoder_select="aandct dwt"
svq1_encoder_select="aandct" svq1_encoder_select="aandct"
svq3_decoder_select="golomb h264chroma h264dsp h264pred" svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
svq3_decoder_suggest="zlib" svq3_decoder_suggest="zlib"
theora_decoder_select="vp3_decoder" theora_decoder_select="vp3_decoder"
tiff_decoder_suggest="zlib" tiff_decoder_suggest="zlib"
...@@ -1381,7 +1382,7 @@ tiff_encoder_suggest="zlib" ...@@ -1381,7 +1382,7 @@ tiff_encoder_suggest="zlib"
truehd_decoder_select="mlp_decoder" truehd_decoder_select="mlp_decoder"
tscc_decoder_select="zlib" tscc_decoder_select="zlib"
twinvq_decoder_select="mdct lsp sinewin" twinvq_decoder_select="mdct lsp sinewin"
vc1_decoder_select="h263_decoder h264chroma" vc1_decoder_select="h263_decoder h264chroma h264qpel"
vc1_dxva2_hwaccel_deps="dxva2api_h" vc1_dxva2_hwaccel_deps="dxva2api_h"
vc1_dxva2_hwaccel_select="dxva2 vc1_decoder" vc1_dxva2_hwaccel_select="dxva2 vc1_decoder"
vc1_vaapi_hwaccel_select="vaapi vc1_decoder" vc1_vaapi_hwaccel_select="vaapi vc1_decoder"
...@@ -1392,7 +1393,7 @@ vorbis_encoder_select="mdct" ...@@ -1392,7 +1393,7 @@ vorbis_encoder_select="mdct"
vp6_decoder_select="huffman" vp6_decoder_select="huffman"
vp6a_decoder_select="vp6_decoder" vp6a_decoder_select="vp6_decoder"
vp6f_decoder_select="vp6_decoder" vp6f_decoder_select="vp6_decoder"
vp8_decoder_select="h264pred" vp8_decoder_select="h264pred h264qpel"
wmapro_decoder_select="mdct sinewin" wmapro_decoder_select="mdct sinewin"
wmav1_decoder_select="mdct sinewin" wmav1_decoder_select="mdct sinewin"
wmav1_encoder_select="mdct sinewin" wmav1_encoder_select="mdct sinewin"
...@@ -1419,7 +1420,7 @@ vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads" ...@@ -1419,7 +1420,7 @@ vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h" vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
# parsers # parsers
h264_parser_select="golomb h264chroma h264dsp h264pred" h264_parser_select="golomb h264chroma h264dsp h264pred h264qpel"
# external libraries # external libraries
libdirac_decoder_deps="libdirac !libschroedinger" libdirac_decoder_deps="libdirac !libschroedinger"
......
...@@ -23,6 +23,7 @@ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ ...@@ -23,6 +23,7 @@ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \ YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \
x86/h264_intrapred_10bit.o x86/h264_intrapred_10bit.o
MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_10bit.o
MMX-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o MMX-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
...@@ -62,7 +63,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o ...@@ -62,7 +63,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \ MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
x86/deinterlace.o \ x86/deinterlace.o \
x86/fmtconvert.o \ x86/fmtconvert.o \
x86/h264_qpel_10bit.o \
$(YASM-OBJS-yes) $(YASM-OBJS-yes)
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o MMX-OBJS-$(CONFIG_FFT) += x86/fft.o
......
...@@ -2479,6 +2479,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, ...@@ -2479,6 +2479,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
} }
if (CONFIG_H264QPEL) {
SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
...@@ -2510,6 +2511,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, ...@@ -2510,6 +2511,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
}
#if HAVE_YASM #if HAVE_YASM
if (!high_bit_depth && CONFIG_H264CHROMA) { if (!high_bit_depth && CONFIG_H264CHROMA) {
...@@ -2577,6 +2579,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, ...@@ -2577,6 +2579,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
} }
if (CONFIG_H264QPEL) {
SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, ); SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, ); SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, ); SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
...@@ -2597,6 +2600,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, ...@@ -2597,6 +2600,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, ); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, ); SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, ); SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
}
#if HAVE_YASM #if HAVE_YASM
if (!high_bit_depth && CONFIG_H264CHROMA) { if (!high_bit_depth && CONFIG_H264CHROMA) {
...@@ -2671,11 +2675,12 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, ...@@ -2671,11 +2675,12 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
c->put_pixels_tab[0][0] = put_pixels16_sse2; c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2; c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2); if (CONFIG_H264QPEL)
H264_QPEL_FUNCS(0, 0, sse2);
} }
} }
if (!high_bit_depth) { if (!high_bit_depth && CONFIG_H264QPEL) {
H264_QPEL_FUNCS(0, 1, sse2); H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2); H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2); H264_QPEL_FUNCS(0, 3, sse2);
...@@ -2692,6 +2697,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, ...@@ -2692,6 +2697,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
#if HAVE_YASM #if HAVE_YASM
if (bit_depth == 10) { if (bit_depth == 10) {
if (CONFIG_H264QPEL) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
...@@ -2699,7 +2705,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, ...@@ -2699,7 +2705,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
H264_QPEL_FUNCS_10(1, 0, sse2_cache64); H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
H264_QPEL_FUNCS_10(2, 0, sse2_cache64); H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
H264_QPEL_FUNCS_10(3, 0, sse2_cache64); H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
}
if (CONFIG_H264CHROMA) { if (CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
...@@ -2729,7 +2735,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, ...@@ -2729,7 +2735,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int high_bit_depth = avctx->bits_per_raw_sample > 8;
const int bit_depth = avctx->bits_per_raw_sample; const int bit_depth = avctx->bits_per_raw_sample;
if (!high_bit_depth) { if (!high_bit_depth && CONFIG_H264QPEL) {
H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3); H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3); H264_QPEL_FUNCS(1, 2, ssse3);
...@@ -2744,7 +2750,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, ...@@ -2744,7 +2750,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
H264_QPEL_FUNCS(3, 3, ssse3); H264_QPEL_FUNCS(3, 3, ssse3);
} }
#if HAVE_YASM #if HAVE_YASM
else if (bit_depth == 10) { else if (bit_depth == 10 && CONFIG_H264QPEL) {
H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
...@@ -2788,9 +2794,11 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) ...@@ -2788,9 +2794,11 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
if (bit_depth == 10) { if (bit_depth == 10) {
// AVX implies !cache64. // AVX implies !cache64.
// TODO: Port cache(32|64) detection from x264. // TODO: Port cache(32|64) detection from x264.
if (CONFIG_H264QPEL) {
H264_QPEL_FUNCS_10(1, 0, sse2); H264_QPEL_FUNCS_10(1, 0, sse2);
H264_QPEL_FUNCS_10(2, 0, sse2); H264_QPEL_FUNCS_10(2, 0, sse2);
H264_QPEL_FUNCS_10(3, 0, sse2); H264_QPEL_FUNCS_10(3, 0, sse2);
}
if (CONFIG_H264CHROMA) { if (CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment