Commit 5169e688 authored by Diego Biurrun's avatar Diego Biurrun

dsputil: Propagate bit depth information to all (sub)init functions

This avoids recalculating the value over and over again.
parent cf7a2167
...@@ -24,8 +24,11 @@ ...@@ -24,8 +24,11 @@
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx,
void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx); unsigned high_bit_depth);
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
#endif /* AVCODEC_ARM_DSPUTIL_ARM_H */ #endif /* AVCODEC_ARM_DSPUTIL_ARM_H */
...@@ -64,14 +64,15 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) ...@@ -64,14 +64,15 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block)
ff_add_pixels_clamped(block, dest, line_size); ff_add_pixels_clamped(block, dest, line_size);
} }
av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
ff_put_pixels_clamped = c->put_pixels_clamped; ff_put_pixels_clamped = c->put_pixels_clamped;
ff_add_pixels_clamped = c->add_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped;
if (avctx->bits_per_raw_sample <= 8) { if (!high_bit_depth) {
if (avctx->idct_algo == FF_IDCT_AUTO || if (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_ARM) { avctx->idct_algo == FF_IDCT_ARM) {
c->idct_put = j_rev_dct_arm_put; c->idct_put = j_rev_dct_arm_put;
...@@ -89,9 +90,9 @@ av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) ...@@ -89,9 +90,9 @@ av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
c->add_pixels_clamped = ff_add_pixels_clamped_arm; c->add_pixels_clamped = ff_add_pixels_clamped_arm;
if (have_armv5te(cpu_flags)) if (have_armv5te(cpu_flags))
ff_dsputil_init_armv5te(c, avctx); ff_dsputil_init_armv5te(c, avctx, high_bit_depth);
if (have_armv6(cpu_flags)) if (have_armv6(cpu_flags))
ff_dsputil_init_armv6(c, avctx); ff_dsputil_init_armv6(c, avctx, high_bit_depth);
if (have_neon(cpu_flags)) if (have_neon(cpu_flags))
ff_dsputil_init_neon(c, avctx); ff_dsputil_init_neon(c, avctx, high_bit_depth);
} }
...@@ -29,9 +29,10 @@ void ff_simple_idct_armv5te(int16_t *data); ...@@ -29,9 +29,10 @@ void ff_simple_idct_armv5te(int16_t *data);
void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data);
void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data);
av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
if (avctx->bits_per_raw_sample <= 8 && if (!high_bit_depth &&
(avctx->idct_algo == FF_IDCT_AUTO || (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) {
c->idct_put = ff_simple_idct_put_armv5te; c->idct_put = ff_simple_idct_put_armv5te;
......
...@@ -52,17 +52,17 @@ int ff_sse16_armv6(void *s, uint8_t *blk1, uint8_t *blk2, ...@@ -52,17 +52,17 @@ int ff_sse16_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
int ff_pix_norm1_armv6(uint8_t *pix, int line_size); int ff_pix_norm1_armv6(uint8_t *pix, int line_size);
int ff_pix_sum_armv6(uint8_t *pix, int line_size); int ff_pix_sum_armv6(uint8_t *pix, int line_size);
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!high_bit_depth) {
if (avctx->idct_algo == FF_IDCT_AUTO ||
if (avctx->bits_per_raw_sample <= 8 && avctx->idct_algo == FF_IDCT_SIMPLEARMV6) {
(avctx->idct_algo == FF_IDCT_AUTO || c->idct_put = ff_simple_idct_put_armv6;
avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { c->idct_add = ff_simple_idct_add_armv6;
c->idct_put = ff_simple_idct_put_armv6; c->idct = ff_simple_idct_armv6;
c->idct_add = ff_simple_idct_add_armv6; c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
c->idct = ff_simple_idct_armv6; }
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
} }
c->add_pixels_clamped = ff_add_pixels_clamped_armv6; c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
......
...@@ -47,11 +47,10 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le ...@@ -47,11 +47,10 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
const int16_t *v3, int len, int mul); const int16_t *v3, int len, int mul);
av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!high_bit_depth) {
if (avctx->bits_per_raw_sample <= 8) {
if (avctx->idct_algo == FF_IDCT_AUTO || if (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLENEON) { avctx->idct_algo == FF_IDCT_SIMPLENEON) {
c->idct_put = ff_simple_idct_put_neon; c->idct_put = ff_simple_idct_put_neon;
......
...@@ -147,18 +147,14 @@ static int bfin_pix_abs8_xy2(void *c, uint8_t *blk1, uint8_t *blk2, ...@@ -147,18 +147,14 @@ static int bfin_pix_abs8_xy2(void *c, uint8_t *blk1, uint8_t *blk2,
* 2.64s 2/20 same sman.mp4 decode only * 2.64s 2/20 same sman.mp4 decode only
*/ */
av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->diff_pixels = ff_bfin_diff_pixels; c->diff_pixels = ff_bfin_diff_pixels;
c->put_pixels_clamped = ff_bfin_put_pixels_clamped; c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
c->add_pixels_clamped = ff_bfin_add_pixels_clamped; c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
if (!high_bit_depth)
c->get_pixels = ff_bfin_get_pixels;
c->clear_blocks = bfin_clear_blocks; c->clear_blocks = bfin_clear_blocks;
c->pix_sum = ff_bfin_pix_sum; c->pix_sum = ff_bfin_pix_sum;
...@@ -182,7 +178,9 @@ av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx) ...@@ -182,7 +178,9 @@ av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx)
c->sse[1] = ff_bfin_sse8; c->sse[1] = ff_bfin_sse8;
c->sse[2] = ff_bfin_sse4; c->sse[2] = ff_bfin_sse4;
if (avctx->bits_per_raw_sample <= 8) { if (!high_bit_depth) {
c->get_pixels = ff_bfin_get_pixels;
if (avctx->dct_algo == FF_DCT_AUTO) if (avctx->dct_algo == FF_DCT_AUTO)
c->fdct = ff_bfin_fdct; c->fdct = ff_bfin_fdct;
......
...@@ -2715,6 +2715,8 @@ av_cold void ff_dsputil_static_init(void) ...@@ -2715,6 +2715,8 @@ av_cold void ff_dsputil_static_init(void)
av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
{ {
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
#if CONFIG_ENCODERS #if CONFIG_ENCODERS
if (avctx->bits_per_raw_sample == 10) { if (avctx->bits_per_raw_sample == 10) {
c->fdct = ff_jpeg_fdct_islow_10; c->fdct = ff_jpeg_fdct_islow_10;
...@@ -2924,13 +2926,13 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) ...@@ -2924,13 +2926,13 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
} }
if (ARCH_ARM) if (ARCH_ARM)
ff_dsputil_init_arm(c, avctx); ff_dsputil_init_arm(c, avctx, high_bit_depth);
if (ARCH_BFIN) if (ARCH_BFIN)
ff_dsputil_init_bfin(c, avctx); ff_dsputil_init_bfin(c, avctx, high_bit_depth);
if (ARCH_PPC) if (ARCH_PPC)
ff_dsputil_init_ppc(c, avctx); ff_dsputil_init_ppc(c, avctx, high_bit_depth);
if (ARCH_X86) if (ARCH_X86)
ff_dsputil_init_x86(c, avctx); ff_dsputil_init_x86(c, avctx, high_bit_depth);
ff_init_scantable_permutation(c->idct_permutation, ff_init_scantable_permutation(c->idct_permutation,
c->idct_permutation_type); c->idct_permutation_type);
......
...@@ -347,9 +347,13 @@ void ff_dsputil_init(DSPContext *p, AVCodecContext *avctx); ...@@ -347,9 +347,13 @@ void ff_dsputil_init(DSPContext *p, AVCodecContext *avctx);
void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type); void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type);
void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx,
void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx); unsigned high_bit_depth);
void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx,
void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx); unsigned high_bit_depth);
void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
#endif /* AVCODEC_DSPUTIL_H */ #endif /* AVCODEC_DSPUTIL_H */
...@@ -926,10 +926,9 @@ static int hadamard8_diff16_altivec(/* MpegEncContext */ void *s, uint8_t *dst, ...@@ -926,10 +926,9 @@ static int hadamard8_diff16_altivec(/* MpegEncContext */ void *s, uint8_t *dst,
return score; return score;
} }
av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->pix_abs[0][1] = sad16_x2_altivec; c->pix_abs[0][1] = sad16_x2_altivec;
c->pix_abs[0][2] = sad16_y2_altivec; c->pix_abs[0][2] = sad16_y2_altivec;
c->pix_abs[0][3] = sad16_xy2_altivec; c->pix_abs[0][3] = sad16_xy2_altivec;
......
...@@ -38,7 +38,8 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, ...@@ -38,7 +38,8 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx); void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx);
#endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */
...@@ -125,10 +125,9 @@ static long check_dcbzl_effect(void) ...@@ -125,10 +125,9 @@ static long check_dcbzl_effect(void)
return count; return count;
} }
av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
// common optimizations whether AltiVec is available or not // common optimizations whether AltiVec is available or not
if (!high_bit_depth) { if (!high_bit_depth) {
switch (check_dcbzl_effect()) { switch (check_dcbzl_effect()) {
...@@ -144,19 +143,17 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) ...@@ -144,19 +143,17 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
} }
if (PPC_ALTIVEC(av_get_cpu_flags())) { if (PPC_ALTIVEC(av_get_cpu_flags())) {
ff_dsputil_init_altivec(c, avctx); ff_dsputil_init_altivec(c, avctx, high_bit_depth);
ff_int_init_altivec(c, avctx); ff_int_init_altivec(c, avctx);
c->gmc1 = ff_gmc1_altivec; c->gmc1 = ff_gmc1_altivec;
if (!high_bit_depth) {
#if CONFIG_ENCODERS #if CONFIG_ENCODERS
if (avctx->bits_per_raw_sample <= 8 && if (avctx->dct_algo == FF_DCT_AUTO ||
(avctx->dct_algo == FF_DCT_AUTO || avctx->dct_algo == FF_DCT_ALTIVEC) {
avctx->dct_algo == FF_DCT_ALTIVEC)) { c->fdct = ff_fdct_altivec;
c->fdct = ff_fdct_altivec; }
}
#endif //CONFIG_ENCODERS #endif //CONFIG_ENCODERS
if (avctx->bits_per_raw_sample <= 8) {
if ((avctx->idct_algo == FF_IDCT_AUTO) || if ((avctx->idct_algo == FF_IDCT_AUTO) ||
(avctx->idct_algo == FF_IDCT_ALTIVEC)) { (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
c->idct_put = ff_idct_put_altivec; c->idct_put = ff_idct_put_altivec;
......
...@@ -518,11 +518,9 @@ do { \ ...@@ -518,11 +518,9 @@ do { \
} while (0) } while (0)
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
int cpu_flags) int cpu_flags, unsigned high_bit_depth)
{ {
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
...@@ -559,11 +557,9 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, ...@@ -559,11 +557,9 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
} }
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
int cpu_flags) int cpu_flags, unsigned high_bit_depth)
{ {
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
c->idct_put = ff_idct_xvid_mmxext_put; c->idct_put = ff_idct_xvid_mmxext_put;
c->idct_add = ff_idct_xvid_mmxext_add; c->idct_add = ff_idct_xvid_mmxext_add;
...@@ -590,11 +586,9 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, ...@@ -590,11 +586,9 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
} }
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
int cpu_flags) int cpu_flags, unsigned high_bit_depth)
{ {
#if HAVE_SSE_INLINE #if HAVE_SSE_INLINE
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->vector_clipf = ff_vector_clipf_sse; c->vector_clipf = ff_vector_clipf_sse;
#if FF_API_XVMC #if FF_API_XVMC
...@@ -613,11 +607,9 @@ FF_ENABLE_DEPRECATION_WARNINGS ...@@ -613,11 +607,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
} }
static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
int cpu_flags) int cpu_flags, unsigned high_bit_depth)
{ {
#if HAVE_SSE2_INLINE #if HAVE_SSE2_INLINE
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
c->idct_put = ff_idct_xvid_sse2_put; c->idct_put = ff_idct_xvid_sse2_put;
c->idct_add = ff_idct_xvid_sse2_add; c->idct_add = ff_idct_xvid_sse2_add;
...@@ -639,7 +631,7 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, ...@@ -639,7 +631,7 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
} }
static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
int cpu_flags) int cpu_flags, unsigned high_bit_depth)
{ {
#if HAVE_SSSE3_EXTERNAL #if HAVE_SSSE3_EXTERNAL
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
...@@ -653,14 +645,15 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, ...@@ -653,14 +645,15 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
} }
static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
int cpu_flags) int cpu_flags, unsigned high_bit_depth)
{ {
#if HAVE_SSE4_EXTERNAL #if HAVE_SSE4_EXTERNAL
c->vector_clip_int32 = ff_vector_clip_int32_sse4; c->vector_clip_int32 = ff_vector_clip_int32_sse4;
#endif /* HAVE_SSE4_EXTERNAL */ #endif /* HAVE_SSE4_EXTERNAL */
} }
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
...@@ -670,23 +663,23 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) ...@@ -670,23 +663,23 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
#endif #endif
if (X86_MMX(cpu_flags)) if (X86_MMX(cpu_flags))
dsputil_init_mmx(c, avctx, cpu_flags); dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);
if (X86_MMXEXT(cpu_flags)) if (X86_MMXEXT(cpu_flags))
dsputil_init_mmxext(c, avctx, cpu_flags); dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);
if (X86_SSE(cpu_flags)) if (X86_SSE(cpu_flags))
dsputil_init_sse(c, avctx, cpu_flags); dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth);
if (X86_SSE2(cpu_flags)) if (X86_SSE2(cpu_flags))
dsputil_init_sse2(c, avctx, cpu_flags); dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);
if (EXTERNAL_SSSE3(cpu_flags)) if (EXTERNAL_SSSE3(cpu_flags))
dsputil_init_ssse3(c, avctx, cpu_flags); dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);
if (EXTERNAL_SSE4(cpu_flags)) if (EXTERNAL_SSE4(cpu_flags))
dsputil_init_sse4(c, avctx, cpu_flags); dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);
if (CONFIG_ENCODERS) if (CONFIG_ENCODERS)
ff_dsputilenc_init_mmx(c, avctx); ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
} }
...@@ -104,7 +104,8 @@ ...@@ -104,7 +104,8 @@
"psubb "#regb", "#regr" \n\t" \ "psubb "#regb", "#regr" \n\t" \
"psubb "#regd", "#regp" \n\t" "psubb "#regd", "#regp" \n\t"
void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx); void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx);
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
......
...@@ -986,16 +986,15 @@ hadamard_func(mmxext) ...@@ -986,16 +986,15 @@ hadamard_func(mmxext)
hadamard_func(sse2) hadamard_func(sse2)
hadamard_func(ssse3) hadamard_func(ssse3)
av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
const int dct_algo = avctx->dct_algo; const int dct_algo = avctx->dct_algo;
#if HAVE_YASM #if HAVE_YASM
int bit_depth = avctx->bits_per_raw_sample;
if (EXTERNAL_MMX(cpu_flags)) { if (EXTERNAL_MMX(cpu_flags)) {
if (bit_depth <= 8) if (!high_bit_depth)
c->get_pixels = ff_get_pixels_mmx; c->get_pixels = ff_get_pixels_mmx;
c->diff_pixels = ff_diff_pixels_mmx; c->diff_pixels = ff_diff_pixels_mmx;
c->pix_sum = ff_pix_sum16_mmx; c->pix_sum = ff_pix_sum16_mmx;
...@@ -1003,13 +1002,13 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) ...@@ -1003,13 +1002,13 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
} }
if (EXTERNAL_SSE2(cpu_flags)) if (EXTERNAL_SSE2(cpu_flags))
if (bit_depth <= 8) if (!high_bit_depth)
c->get_pixels = ff_get_pixels_sse2; c->get_pixels = ff_get_pixels_sse2;
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
if (INLINE_MMX(cpu_flags)) { if (INLINE_MMX(cpu_flags)) {
if (avctx->bits_per_raw_sample <= 8 && if (!high_bit_depth &&
(dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX))
c->fdct = ff_fdct_mmx; c->fdct = ff_fdct_mmx;
...@@ -1039,7 +1038,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) ...@@ -1039,7 +1038,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
} }
if (INLINE_MMXEXT(cpu_flags)) { if (INLINE_MMXEXT(cpu_flags)) {
if (avctx->bits_per_raw_sample <= 8 && if (!high_bit_depth &&
(dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX))
c->fdct = ff_fdct_mmxext; c->fdct = ff_fdct_mmxext;
...@@ -1054,7 +1053,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) ...@@ -1054,7 +1053,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
} }
if (INLINE_SSE2(cpu_flags)) { if (INLINE_SSE2(cpu_flags)) {
if (avctx->bits_per_raw_sample <= 8 && if (!high_bit_depth &&
(dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX))
c->fdct = ff_fdct_sse2; c->fdct = ff_fdct_sse2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment