Commit 19a0729b authored by Oskar Arvidsson's avatar Oskar Arvidsson Committed by Ronald S. Bultje

Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder.

This patch lets e.g. dsputil_init chose dsp functions with respect to
the bit depth to decode. The naming scheme of bit depth dependent
functions is <base name>_<bit depth>[_<prefix>] (i.e. the old
clear_blocks_c is now named clear_blocks_8_c).

Note: Some of the functions for high bit depth is not dependent on the
bit depth, but only on the pixel size. This leaves some room for
optimizing binary size.

Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent fcc0224e
......@@ -270,6 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
......@@ -311,6 +314,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
c->clear_blocks = clear_blocks_axp;
}
/* amask clears all bits that correspond to present features. */
if (amask(AMASK_MVI) == 0) {
......
......@@ -75,6 +75,8 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
ff_put_pixels_clamped = c->put_pixels_clamped;
ff_add_pixels_clamped = c->add_pixels_clamped;
......@@ -95,6 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = ff_add_pixels_clamped_arm;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
......@@ -112,6 +115,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
}
if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx);
......
......@@ -72,6 +72,8 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) {
c->idct_put = ff_simple_idct_put_armv6;
......@@ -80,6 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
}
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
......@@ -100,6 +103,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6;
c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
}
c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
c->get_pixels = ff_get_pixels_armv6;
......
......@@ -173,6 +173,8 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (!avctx->lowres) {
if (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLENEON) {
......@@ -190,6 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
}
}
if (!high_bit_depth) {
c->clear_block = ff_clear_block_neon;
c->clear_blocks = ff_clear_blocks_neon;
......@@ -213,12 +216,14 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
}
c->add_pixels_clamped = ff_add_pixels_clamped_neon;
c->put_pixels_clamped = ff_put_pixels_clamped_neon;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
if (CONFIG_H264_DECODER) {
if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
......@@ -294,6 +299,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
}
}
if (CONFIG_VP3_DECODER) {
......
......@@ -155,6 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
......@@ -167,6 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
if (!high_bit_depth) {
c->clear_blocks = clear_blocks_iwmmxt;
c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
......@@ -204,4 +206,5 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
}
}
......@@ -92,8 +92,9 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride,
const uint8_t nnzc[6*8]);
static void ff_h264dsp_init_neon(H264DSPContext *c)
static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
{
if (bit_depth == 8) {
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
......@@ -125,9 +126,10 @@ static void ff_h264dsp_init_neon(H264DSPContext *c)
c->h264_idct8_add = ff_h264_idct8_add_neon;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon;
c->h264_idct8_add4 = ff_h264_idct8_add4_neon;
}
}
void ff_h264dsp_init_arm(H264DSPContext *c)
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
{
if (HAVE_NEON) ff_h264dsp_init_neon(c);
if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
}
......@@ -42,8 +42,13 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
{
const int high_depth = bit_depth > 8;
if (high_depth)
return;
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon;
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon;
if (codec_id != CODEC_ID_VP8)
......@@ -69,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon;
}
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id)
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, bit_depth)
{
if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id);
if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth);
}
......@@ -197,11 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
c->get_pixels = ff_bfin_get_pixels;
c->diff_pixels = ff_bfin_diff_pixels;
c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
if (!high_bit_depth)
c->clear_blocks = bfin_clear_blocks;
c->pix_sum = ff_bfin_pix_sum;
c->pix_norm1 = ff_bfin_pix_norm1;
......@@ -228,6 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
c->sse[1] = ff_bfin_sse8;
c->sse[2] = ff_bfin_sse4;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = bfin_put_pixels16;
c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
......@@ -247,6 +251,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
/* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
}
if (avctx->dct_algo == FF_DCT_AUTO)
c->fdct = ff_bfin_fdct;
......
This diff is collapsed.
......@@ -53,19 +53,24 @@ void ff_fdct_mmx(DCTELEM *block);
void ff_fdct_mmx2(DCTELEM *block);
void ff_fdct_sse2(DCTELEM *block);
void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul);
void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
#define H264_IDCT(depth) \
void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_lowres_idct_add_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
void ff_h264_lowres_idct_put_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
H264_IDCT( 8)
H264_IDCT( 9)
H264_IDCT(10)
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
......@@ -82,10 +87,20 @@ extern const uint8_t ff_zigzag248_direct[64];
extern uint32_t ff_squareTbl[512];
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
#define PUTAVG_PIXELS(depth)\
void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);
PUTAVG_PIXELS( 8)
PUTAVG_PIXELS( 9)
PUTAVG_PIXELS(10)
#define ff_put_pixels8x8_c ff_put_pixels8x8_8_c
#define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c
#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
/* VP3 DSP functions */
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
......@@ -187,10 +202,17 @@ typedef struct ScanTable{
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize,
int block_w, int block_h,
#define EMULATED_EDGE(depth) \
void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
int block_w, int block_h,\
int src_x, int src_y, int w, int h);
EMULATED_EDGE(8)
EMULATED_EDGE(9)
EMULATED_EDGE(10)
#define ff_emulated_edge_mc ff_emulated_edge_mc_8
void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
......@@ -562,6 +584,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
#define BYTE_VEC32(c) ((c)*0x01010101UL)
#define BYTE_VEC64(c) ((c)*0x0001000100010001UL)
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{
......@@ -573,6 +596,16 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}
static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
{
return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
}
static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
{
return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
}
static inline int get_penalty_factor(int lambda, int lambda2, int type){
switch(type&0xFF){
default:
......
......@@ -27,25 +27,55 @@
* DSP utils
*/
#include "dsputil.h"
#include "high_bit_depth.h"
#define BIT_DEPTH 8
static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN2P(dst , AV_RN2P(src ));
dst+=dstStride;
src+=srcStride;
}
}
#define pixel uint8_t
#define pixel2 uint16_t
#define pixel4 uint32_t
#define dctcoef int16_t
static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN4P(dst , AV_RN4P(src ));
dst+=dstStride;
src+=srcStride;
}
}
#define FUNC(a) a
#define FUNCC(a) a ## _c
#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
#define CLIP(a) cm[a]
#define AV_RN2P AV_RN16
#define AV_RN4P AV_RN32
#define PIXEL_MAX ((1<<BIT_DEPTH)-1)
static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN4P(dst , AV_RN4P(src ));
AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
dst+=dstStride;
src+=srcStride;
}
}
#define no_rnd_avg_pixel4 no_rnd_avg32
#define rnd_avg_pixel4 rnd_avg32
static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN4P(dst , AV_RN4P(src ));
AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
dst+=dstStride;
src+=srcStride;
}
}
/* draw the edges of width 'w' of an image of size width, height */
//FIXME check that this is ok for mpeg4 interlaced
......@@ -1317,10 +1347,22 @@ H264_MC(avg_, 16)
#undef op2_avg
#undef op2_put
#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c
#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c
#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
#if BIT_DEPTH == 8
# define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
# define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
# define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
# define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
#elif BIT_DEPTH == 9
# define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
# define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
# define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
# define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
#elif BIT_DEPTH == 10
# define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
# define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
# define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
# define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
#endif
void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
FUNCC(put_pixels8)(dst, src, stride, 8);
......
......@@ -783,7 +783,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
dst->list_counts = src->list_counts;
dst->s.obmc_scratchpad = NULL;
ff_h264_pred_init(&dst->hpc, src->s.codec_id);
ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
}
/**
......@@ -811,8 +811,8 @@ static av_cold void common_init(H264Context *h){
s->height = s->avctx->height;
s->codec_id= s->avctx->codec->id;
ff_h264dsp_init(&h->h264dsp);
ff_h264_pred_init(&h->hpc, s->codec_id);
ff_h264dsp_init(&h->h264dsp, 8);
ff_h264_pred_init(&h->hpc, s->codec_id, 8);
h->dequant_coeff_pps= -1;
s->unrestricted_mv=1;
......@@ -895,7 +895,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
ff_h264_decode_init_vlc();
h->pixel_shift = 0;
h->sps.bit_depth_luma = 8;
h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
h->thread_context[0] = h;
h->outputed_poc = INT_MIN;
......@@ -2998,6 +2998,20 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
if(avctx->has_b_frames < 2)
avctx->has_b_frames= !s->low_delay;
if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
h->pixel_shift = h->sps.bit_depth_luma > 8;
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
dsputil_init(&s->dsp, s->avctx);
} else {
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
return -1;
}
}
break;
case NAL_PPS:
init_get_bits(&s->gb, ptr, bit_length);
......
......@@ -29,57 +29,83 @@
#include "avcodec.h"
#include "h264dsp.h"
#define BIT_DEPTH 8
#include "h264dsp_template.c"
#undef BIT_DEPTH
void ff_h264dsp_init(H264DSPContext *c)
{
c->h264_idct_add= ff_h264_idct_add_c;
c->h264_idct8_add= ff_h264_idct8_add_c;
c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
c->h264_idct_add16 = ff_h264_idct_add16_c;
c->h264_idct8_add4 = ff_h264_idct8_add4_c;
c->h264_idct_add8 = ff_h264_idct_add8_c;
c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c;
c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c;
#define BIT_DEPTH 9
#include "h264dsp_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
#include "h264dsp_template.c"
#undef BIT_DEPTH
c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
{
#undef FUNC
#define FUNC(a, depth) a ## _ ## depth ## _c
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
c->h264_h_loop_filter_luma_mbaff= h264_h_loop_filter_luma_mbaff_c;
c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
c->h264_h_loop_filter_luma_mbaff_intra= h264_h_loop_filter_luma_mbaff_intra_c;
c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
c->h264_h_loop_filter_chroma_mbaff= h264_h_loop_filter_chroma_mbaff_c;
c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
c->h264_h_loop_filter_chroma_mbaff_intra= h264_h_loop_filter_chroma_mbaff_intra_c;
#define H264_DSP(depth) \
c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\
c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\
c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\
c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
\
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
\
c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\
c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\
c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
c->h264_loop_filter_strength= NULL;
if (ARCH_ARM) ff_h264dsp_init_arm(c);
if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c);
if (HAVE_MMX) ff_h264dsp_init_x86(c);
switch (bit_depth) {
case 9:
H264_DSP(9);
break;
case 10:
H264_DSP(10);
break;
default:
H264_DSP(8);
break;
}
if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth);
if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth);
}
......@@ -75,9 +75,9 @@ typedef struct H264DSPContext{
void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
}H264DSPContext;
void ff_h264dsp_init(H264DSPContext *c);
void ff_h264dsp_init_arm(H264DSPContext *c);
void ff_h264dsp_init_ppc(H264DSPContext *c);
void ff_h264dsp_init_x86(H264DSPContext *c);
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth);
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth);
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth);
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth);
#endif /* AVCODEC_H264DSP_H */
......@@ -25,10 +25,7 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#define BIT_DEPTH 8
#define pixel uint8_t
#define av_clip_pixel av_clip_uint8
#define FUNCC(a) a ## _c
#include "high_bit_depth.h"
#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
......
......@@ -25,4 +25,14 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#define BIT_DEPTH 8
#include "h264idct_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 9
#include "h264idct_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 10
#include "h264idct_template.c"
#undef BIT_DEPTH
......@@ -25,7 +25,7 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "dsputil.h"
#include "high_bit_depth.h"
#ifndef AVCODEC_H264IDCT_INTERNAL_H
#define AVCODEC_H264IDCT_INTERNAL_H
......@@ -42,12 +42,6 @@ static const uint8_t scan8[16 + 2*4]={
};
#endif
#define pixel uint8_t
#define dctcoef DCTELEM
#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
#define CLIP(a) cm[a]
#define FUNCC(a) a ## _c
static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){
int i;
INIT_CLIP
......
This diff is collapsed.
......@@ -101,8 +101,8 @@ typedef struct H264PredContext{
void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
}H264PredContext;
void ff_h264_pred_init(H264PredContext *h, int codec_id);
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id);
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id);
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth);
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth);
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth);
#endif /* AVCODEC_H264PRED_H */
......@@ -26,21 +26,7 @@
*/
#include "mathops.h"
#include "dsputil.h"
#define BIT_DEPTH 8
#define pixel uint8_t
#define pixel4 uint32_t
#define dctcoef DCTELEM
#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
#define CLIP(a) cm[a]
#define FUNC(a) a
#define FUNCC(a) a ## _c
#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
#define AV_WN4P AV_WN32
#define AV_WN4PA AV_WN32A
#include "high_bit_depth.h"
static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
pixel *src = (pixel*)_src;
......
#include "dsputil.h"
#ifndef BIT_DEPTH
#define BIT_DEPTH 8
#endif
#ifdef AVCODEC_H264_HIGH_DEPTH_H
# undef pixel
# undef pixel2
# undef pixel4
# undef dctcoef
# undef INIT_CLIP
# undef no_rnd_avg_pixel4
# undef rnd_avg_pixel4
# undef AV_RN2P
# undef AV_RN4P
# undef AV_WN2P
# undef AV_WN4P
# undef AV_WN4PA
# undef CLIP
# undef FUNC
# undef FUNCC
# undef av_clip_pixel
# undef PIXEL_SPLAT_X4
#else
# define AVCODEC_H264_HIGH_DEPTH_H
# define CLIP_PIXEL(depth)\
static inline uint16_t av_clip_pixel_ ## depth (int p)\
{\
const int pixel_max = (1 << depth)-1;\
return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\
}
CLIP_PIXEL( 9)
CLIP_PIXEL(10)
#endif
#if BIT_DEPTH > 8
# define pixel uint16_t
# define pixel2 uint32_t
# define pixel4 uint64_t
# define dctcoef int32_t
# define INIT_CLIP
# define no_rnd_avg_pixel4 no_rnd_avg64
# define rnd_avg_pixel4 rnd_avg64
# define AV_RN2P AV_RN32
# define AV_RN4P AV_RN64
# define AV_WN2P AV_WN32
# define AV_WN4P AV_WN64
# define AV_WN4PA AV_WN64A
# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
#else
# define pixel uint8_t
# define pixel2 uint16_t
# define pixel4 uint32_t
# define dctcoef int16_t
# define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
# define no_rnd_avg_pixel4 no_rnd_avg32
# define rnd_avg_pixel4 rnd_avg32
# define AV_RN2P AV_RN16
# define AV_RN4P AV_RN32
# define AV_WN2P AV_WN16
# define AV_WN4P AV_WN32
# define AV_WN4PA AV_WN32A
# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
#endif
#if BIT_DEPTH == 8
# define av_clip_pixel(a) av_clip_uint8(a)
# define CLIP(a) cm[a]
# define FUNC(a) a ## _8
# define FUNCC(a) a ## _8_c
#elif BIT_DEPTH == 9
# define av_clip_pixel(a) av_clip_pixel_9(a)
# define CLIP(a) av_clip_pixel_9(a)
# define FUNC(a) a ## _9
# define FUNCC(a) a ## _9_c
#elif BIT_DEPTH == 10
# define av_clip_pixel(a) av_clip_pixel_10(a)
# define CLIP(a) av_clip_pixel_10(a)
# define FUNC(a) a ## _10
# define FUNCC(a) a ## _10_c
#endif
......@@ -421,10 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data)
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
c->get_pixels = get_pixels_mlib;
c->diff_pixels = diff_pixels_mlib;
c->add_pixels_clamped = add_pixels_clamped_mlib;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_mlib;
c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
......@@ -445,6 +448,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
}
c->bswap_buf = bswap_buf_mlib;
}
......
......@@ -1384,6 +1384,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
c->pix_abs[0][1] = sad16_x2_altivec;
c->pix_abs[0][2] = sad16_y2_altivec;
c->pix_abs[0][3] = sad16_xy2_altivec;
......@@ -1397,8 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
c->get_pixels = get_pixels_altivec;
if (!high_bit_depth)
c->clear_block = clear_block_altivec;
c->add_bytes= add_bytes_altivec;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_altivec;
/* the two functions do the same thing, so use the same code */
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
......@@ -1409,6 +1413,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
}
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
......
......@@ -153,8 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h)
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
// Common optimizations whether AltiVec is available or not
c->prefetch = prefetch_ppc;
if (!high_bit_depth) {
switch (check_dcbzl_effect()) {
case 32:
c->clear_blocks = clear_blocks_dcbz32_ppc;
......@@ -165,6 +168,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
default:
break;
}
}
#if HAVE_ALTIVEC
if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
......
......@@ -965,8 +965,10 @@ H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
......@@ -992,11 +994,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
dspfunc(avg_h264_qpel, 0, 16);
#undef dspfunc
}
}
}
void ff_h264dsp_init_ppc(H264DSPContext *c)
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth)
{
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
if (bit_depth == 8) {
c->h264_idct_add = ff_h264_idct_add_altivec;
c->h264_idct_add8 = ff_h264_idct_add8_altivec;
c->h264_idct_add16 = ff_h264_idct_add16_altivec;
......@@ -1019,4 +1023,5 @@ void ff_h264dsp_init_ppc(H264DSPContext *c)
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
}
}
}
......@@ -142,7 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->clear_blocks = clear_blocks_mmi;
c->put_pixels_tab[1][0] = put_pixels8_mmi;
......@@ -150,6 +152,7 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_tab[0][0] = put_pixels16_mmi;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
}
c->get_pixels = get_pixels_mmi;
......
......@@ -1384,7 +1384,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
if (MPV_common_init(s) < 0)
return -1;
ff_h264_pred_init(&r->h, CODEC_ID_RV40);
ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
r->intra_types_stride = 4*s->mb_stride + 4;
r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
......
......@@ -333,6 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
......@@ -368,6 +371,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
}
#ifdef QPEL
......@@ -401,20 +405,24 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_qpel, 1, 8);
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
if (!high_bit_depth) {
dspfunc(put_h264_qpel, 0, 16);
dspfunc(put_h264_qpel, 1, 8);
dspfunc(put_h264_qpel, 2, 4);
dspfunc(avg_h264_qpel, 0, 16);
dspfunc(avg_h264_qpel, 1, 8);
dspfunc(avg_h264_qpel, 2, 4);
}
#undef dspfunc
if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
}
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
......
......@@ -92,8 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
dsputil_init_align(c,avctx);
if (!high_bit_depth)
c->clear_blocks = clear_blocks_sh4;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
c->idct_put = idct_put;
......
......@@ -3953,6 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
{
/* VIS-specific optimizations */
int accel = vis_level ();
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (accel & ACCEL_SPARC_VIS) {
if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){
......@@ -3962,6 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
}
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = MC_put_o_16_vis;
c->put_pixels_tab[0][1] = MC_put_x_16_vis;
c->put_pixels_tab[0][2] = MC_put_y_16_vis;
......@@ -4001,5 +4003,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis;
}
}
}
......@@ -1698,7 +1698,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
avctx->pix_fmt = PIX_FMT_YUV420P;
dsputil_init(&s->dsp, avctx);
ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
ff_vp8dsp_init(&s->vp8dsp);
return 0;
......
......@@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
......@@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
if (!high_bit_depth) {
c->clear_block = clear_block_mmx;
c->clear_blocks = clear_blocks_mmx;
if ((mm_flags & AV_CPU_FLAG_SSE) &&
......@@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->clear_block = clear_block_sse;
c->clear_blocks = clear_blocks_sse;
}
}
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
......@@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
if (!high_bit_depth) {
SET_HPEL_FUNCS(put, 0, 16, mmx);
SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
SET_HPEL_FUNCS(avg, 0, 16, mmx);
......@@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
SET_HPEL_FUNCS(avg, 1, 8, mmx);
SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
}
#if ARCH_X86_32 || !HAVE_YASM
c->gmc= gmc_mmx;
#endif
#if ARCH_X86_32 && HAVE_YASM
if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_mmx;
#endif
c->add_bytes= add_bytes_mmx;
c->add_bytes_l2= add_bytes_l2_mmx;
if (!high_bit_depth)
c->draw_edges = draw_edges_mmx;
if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
......@@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#if HAVE_YASM
if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
}
c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
......@@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->prefetch = prefetch_mmx2;
if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
......@@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
}
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
if (!high_bit_depth) {
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
}
if (CONFIG_VP3_DECODER && HAVE_YASM) {
c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
......@@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
}
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
......@@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
}
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
#endif
......@@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
c->prefetch = prefetch_3dnow;
if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
......@@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
}
}
if (CONFIG_VP3_DECODER
&& (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
......@@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
}
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
......@@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
#if HAVE_YASM
if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
}
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
......@@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2);
}
}
if(mm_flags & AV_CPU_FLAG_SSE2){
if (!high_bit_depth) {
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
......@@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, sse2);
H264_QPEL_FUNCS(3, 2, sse2);
H264_QPEL_FUNCS(3, 3, sse2);
}
}
#if HAVE_SSSE3
if(mm_flags & AV_CPU_FLAG_SSSE3){
if (!high_bit_depth) {
H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3);
......@@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, ssse3);
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
}
c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
#if HAVE_YASM
if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
}
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
......@@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
c->gmc= gmc_sse;
#endif
......
......@@ -95,9 +95,13 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s
void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride);
void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
{
int mm_flags = av_get_cpu_flags();
const int high_depth = bit_depth > 8;
if (high_depth)
return;
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
......
......@@ -285,10 +285,11 @@ H264_BIWEIGHT_MMX ( 4, 8)
H264_BIWEIGHT_MMX ( 4, 4)
H264_BIWEIGHT_MMX ( 4, 2)
void ff_h264dsp_init_x86(H264DSPContext *c)
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
{
int mm_flags = av_get_cpu_flags();
if (bit_depth == 8) {
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
}
......@@ -378,5 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c)
}
}
}
}
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment