Commit 2ed00820 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Martin Storsjö

h264: Add add_pixels4/8() to h264dsp, and remove add_pixels4 from dsputil

These functions are mostly H264-specific (the only other user I can
spot is bink), and this allows us to special-case some functionality
for H264. Also remove the 16-bit-coeff with >8bpp versions (unused)
and merge the duplicate 32-bit-coeff for >8bpp (identical).
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent e5ffffe4
...@@ -403,6 +403,26 @@ static void put_signed_pixels_clamped_c(const int16_t *block, ...@@ -403,6 +403,26 @@ static void put_signed_pixels_clamped_c(const int16_t *block,
} }
} }
static void add_pixels8_c(uint8_t *restrict pixels,
int16_t *block,
int line_size)
{
int i;
for(i=0;i<8;i++) {
pixels[0] += block[0];
pixels[1] += block[1];
pixels[2] += block[2];
pixels[3] += block[3];
pixels[4] += block[4];
pixels[5] += block[5];
pixels[6] += block[6];
pixels[7] += block[7];
pixels += line_size;
block += 8;
}
}
static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
int line_size) int line_size)
{ {
...@@ -2678,6 +2698,8 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -2678,6 +2698,8 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->shrink[2]= ff_shrink44; c->shrink[2]= ff_shrink44;
c->shrink[3]= ff_shrink88; c->shrink[3]= ff_shrink88;
c->add_pixels8 = add_pixels8_c;
#define hpel_funcs(prefix, idx, num) \ #define hpel_funcs(prefix, idx, num) \
c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \ c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \ c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
...@@ -2706,8 +2728,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -2706,8 +2728,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->draw_edges = FUNCC(draw_edges , depth);\ c->draw_edges = FUNCC(draw_edges , depth);\
c->clear_block = FUNCC(clear_block ## dct , depth);\ c->clear_block = FUNCC(clear_block ## dct , depth);\
c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
switch (avctx->bits_per_raw_sample) { switch (avctx->bits_per_raw_sample) {
case 9: case 9:
......
...@@ -153,7 +153,6 @@ typedef struct DSPContext { ...@@ -153,7 +153,6 @@ typedef struct DSPContext {
void (*put_signed_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); void (*put_signed_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); void (*add_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels8)(uint8_t *pixels, int16_t *block, int line_size); void (*add_pixels8)(uint8_t *pixels, int16_t *block, int line_size);
void (*add_pixels4)(uint8_t *pixels, int16_t *block, int line_size);
int (*sum_abs_dctelem)(int16_t *block/*align 16*/); int (*sum_abs_dctelem)(int16_t *block/*align 16*/);
/** /**
* translational global motion compensation. * translational global motion compensation.
......
...@@ -89,48 +89,6 @@ static void FUNCC(get_pixels ## suffix)(int16_t *restrict _block, \ ...@@ -89,48 +89,6 @@ static void FUNCC(get_pixels ## suffix)(int16_t *restrict _block, \
} \ } \
} \ } \
\ \
static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \
int16_t *_block, \
int line_size) \
{ \
int i; \
pixel *restrict pixels = (pixel *restrict)_pixels; \
dctcoef *block = (dctcoef*)_block; \
line_size /= sizeof(pixel); \
\
for(i=0;i<8;i++) { \
pixels[0] += block[0]; \
pixels[1] += block[1]; \
pixels[2] += block[2]; \
pixels[3] += block[3]; \
pixels[4] += block[4]; \
pixels[5] += block[5]; \
pixels[6] += block[6]; \
pixels[7] += block[7]; \
pixels += line_size; \
block += 8; \
} \
} \
\
static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \
int16_t *_block, \
int line_size) \
{ \
int i; \
pixel *restrict pixels = (pixel *restrict)_pixels; \
dctcoef *block = (dctcoef*)_block; \
line_size /= sizeof(pixel); \
\
for(i=0;i<4;i++) { \
pixels[0] += block[0]; \
pixels[1] += block[1]; \
pixels[2] += block[2]; \
pixels[3] += block[3]; \
pixels += line_size; \
block += 4; \
} \
} \
\
static void FUNCC(clear_block ## suffix)(int16_t *block) \ static void FUNCC(clear_block ## suffix)(int16_t *block) \
{ \ { \
memset(block, 0, sizeof(dctcoef)*64); \ memset(block, 0, sizeof(dctcoef)*64); \
......
...@@ -2116,7 +2116,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, ...@@ -2116,7 +2116,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
if (IS_8x8DCT(mb_type)) { if (IS_8x8DCT(mb_type)) {
if (transform_bypass) { if (transform_bypass) {
idct_dc_add = idct_dc_add =
idct_add = h->dsp.add_pixels8; idct_add = h->h264dsp.h264_add_pixels8;
} else { } else {
idct_dc_add = h->h264dsp.h264_idct8_dc_add; idct_dc_add = h->h264dsp.h264_idct8_dc_add;
idct_add = h->h264dsp.h264_idct8_add; idct_add = h->h264dsp.h264_idct8_add;
...@@ -2141,7 +2141,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, ...@@ -2141,7 +2141,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
} else { } else {
if (transform_bypass) { if (transform_bypass) {
idct_dc_add = idct_dc_add =
idct_add = h->dsp.add_pixels4; idct_add = h->h264dsp.h264_add_pixels4;
} else { } else {
idct_dc_add = h->h264dsp.h264_idct_dc_add; idct_dc_add = h->h264dsp.h264_idct_dc_add;
idct_add = h->h264dsp.h264_idct_add; idct_add = h->h264dsp.h264_idct_add;
...@@ -2238,9 +2238,9 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, ...@@ -2238,9 +2238,9 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
for (i = 0; i < 16; i++) for (i = 0; i < 16; i++)
if (h->non_zero_count_cache[scan8[i + p * 16]] || if (h->non_zero_count_cache[scan8[i + p * 16]] ||
dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
h->dsp.add_pixels4(dest_y + block_offset[i], h->h264dsp.h264_add_pixels4(dest_y + block_offset[i],
h->mb + (i * 16 + p * 256 << pixel_shift), h->mb + (i * 16 + p * 256 << pixel_shift),
linesize); linesize);
} }
} else { } else {
h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
...@@ -2251,8 +2251,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, ...@@ -2251,8 +2251,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
} else if (h->cbp & 15) { } else if (h->cbp & 15) {
if (transform_bypass) { if (transform_bypass) {
const int di = IS_8x8DCT(mb_type) ? 4 : 1; const int di = IS_8x8DCT(mb_type) ? 4 : 1;
idct_add = IS_8x8DCT(mb_type) ? h->dsp.add_pixels8 idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8
: h->dsp.add_pixels4; : h->h264dsp.h264_add_pixels4;
for (i = 0; i < 16; i += di) for (i = 0; i < 16; i += di)
if (h->non_zero_count_cache[scan8[i + p * 16]]) if (h->non_zero_count_cache[scan8[i + p * 16]])
idct_add(dest_y + block_offset[i], idct_add(dest_y + block_offset[i],
......
...@@ -207,7 +207,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) ...@@ -207,7 +207,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
h->mb + (16 * 16 * 2 << PIXEL_SHIFT), h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
uvlinesize); uvlinesize);
} else { } else {
idct_add = h->dsp.add_pixels4; idct_add = h->h264dsp.h264_add_pixels4;
for (j = 1; j < 3; j++) { for (j = 1; j < 3; j++) {
for (i = j * 16; i < j * 16 + 4; i++) for (i = j * 16; i < j * 16 + 4; i++)
if (h->non_zero_count_cache[scan8[i]] || if (h->non_zero_count_cache[scan8[i]] ||
......
/*
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* H.264 / AVC / MPEG4 part10 DSP functions.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "bit_depth_template.c"
static void FUNCC(ff_h264_add_pixels4)(uint8_t *_dst, int16_t *_src, int stride)
{
int i;
pixel *dst = (pixel *) _dst;
dctcoef *src = (dctcoef *) _src;
stride /= sizeof(pixel);
for (i = 0; i < 4; i++) {
dst[0] += src[0];
dst[1] += src[1];
dst[2] += src[2];
dst[3] += src[3];
dst += stride;
src += 4;
}
}
static void FUNCC(ff_h264_add_pixels8)(uint8_t *_dst, int16_t *_src, int stride)
{
int i;
pixel *dst = (pixel *) _dst;
dctcoef *src = (dctcoef *) _src;
stride /= sizeof(pixel);
for (i = 0; i < 8; i++) {
dst[0] += src[0];
dst[1] += src[1];
dst[2] += src[2];
dst[3] += src[3];
dst[4] += src[4];
dst[5] += src[5];
dst[6] += src[6];
dst[7] += src[7];
dst += stride;
src += 8;
}
}
...@@ -43,11 +43,29 @@ ...@@ -43,11 +43,29 @@
#include "h264dsp_template.c" #include "h264dsp_template.c"
#undef BIT_DEPTH #undef BIT_DEPTH
#define BIT_DEPTH 8
#include "h264addpx_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 16
#include "h264addpx_template.c"
#undef BIT_DEPTH
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
#undef FUNC #undef FUNC
#define FUNC(a, depth) a ## _ ## depth ## _c #define FUNC(a, depth) a ## _ ## depth ## _c
#define ADDPX_DSP(depth) \
c->h264_add_pixels4 = FUNC(ff_h264_add_pixels4, depth);\
c->h264_add_pixels8 = FUNC(ff_h264_add_pixels8, depth)
if (bit_depth > 8 && bit_depth <= 16) {
ADDPX_DSP(16);
} else {
ADDPX_DSP(8);
}
#define H264_DSP(depth) \ #define H264_DSP(depth) \
c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\ c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\ c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
......
...@@ -101,6 +101,10 @@ typedef struct H264DSPContext { ...@@ -101,6 +101,10 @@ typedef struct H264DSPContext {
void (*h264_luma_dc_dequant_idct)(int16_t *output, void (*h264_luma_dc_dequant_idct)(int16_t *output,
int16_t *input /*align 16*/, int qmul); int16_t *input /*align 16*/, int qmul);
void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul); void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);
/* bypass-transform */
void (*h264_add_pixels8)(uint8_t *dst, int16_t *block, int stride);
void (*h264_add_pixels4)(uint8_t *dst, int16_t *block, int stride);
} H264DSPContext; } H264DSPContext;
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment