Commit 95c89da3 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Use ptrdiff_t instead of int for intra pred "stride" function parameter.

This way, SIMD-optimized functions don't have to sign-extend their
stride argument manually to be able to do pointer arithmetic.
parent bad8e33d
...@@ -23,25 +23,25 @@ ...@@ -23,25 +23,25 @@
#include "libavutil/arm/cpu.h" #include "libavutil/arm/cpu.h"
#include "libavcodec/h264pred.h" #include "libavcodec/h264pred.h"
void ff_pred16x16_vert_neon(uint8_t *src, int stride); void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_hor_neon(uint8_t *src, int stride); void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_plane_neon(uint8_t *src, int stride); void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_dc_neon(uint8_t *src, int stride); void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_128_dc_neon(uint8_t *src, int stride); void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_left_dc_neon(uint8_t *src, int stride); void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_top_dc_neon(uint8_t *src, int stride); void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_vert_neon(uint8_t *src, int stride); void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_hor_neon(uint8_t *src, int stride); void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_plane_neon(uint8_t *src, int stride); void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_dc_neon(uint8_t *src, int stride); void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_128_dc_neon(uint8_t *src, int stride); void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_left_dc_neon(uint8_t *src, int stride); void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_top_dc_neon(uint8_t *src, int stride); void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l0t_dc_neon(uint8_t *src, int stride); void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
{ {
......
...@@ -39,7 +39,9 @@ ...@@ -39,7 +39,9 @@
#include "h264pred_template.c" #include "h264pred_template.c"
#undef BIT_DEPTH #undef BIT_DEPTH
static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
const unsigned lt = src[-1-1*stride]; const unsigned lt = src[-1-1*stride];
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
...@@ -54,7 +56,9 @@ static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int st ...@@ -54,7 +56,9 @@ static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int st
AV_WN32A(src+3*stride, v); AV_WN32A(src+3*stride, v);
} }
static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
const unsigned lt = src[-1-1*stride]; const unsigned lt = src[-1-1*stride];
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
...@@ -64,7 +68,9 @@ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int ...@@ -64,7 +68,9 @@ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int
AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101); AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
} }
static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
...@@ -86,7 +92,9 @@ static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int ...@@ -86,7 +92,9 @@ static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int
src[3+3*stride]=(l3 + t3)>>1; src[3+3*stride]=(l3 + t3)>>1;
} }
static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
...@@ -110,7 +118,10 @@ static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int ...@@ -110,7 +118,10 @@ static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int
src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2; src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
} }
static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src,
const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
...@@ -133,8 +144,11 @@ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *toprigh ...@@ -133,8 +144,11 @@ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *toprigh
src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2; src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
} }
static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride, static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright,
const int l0, const int l1, const int l2, const int l3, const int l4){ ptrdiff_t stride,
const int l0, const int l1, const int l2,
const int l3, const int l4)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
...@@ -156,20 +170,27 @@ static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, in ...@@ -156,20 +170,27 @@ static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, in
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
} }
static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
LOAD_DOWN_LEFT_EDGE LOAD_DOWN_LEFT_EDGE
pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4); pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
} }
static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src,
const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3); pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
} }
static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
...@@ -191,7 +212,9 @@ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, i ...@@ -191,7 +212,9 @@ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, i
src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2; src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
} }
static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
LOAD_DOWN_LEFT_EDGE LOAD_DOWN_LEFT_EDGE
LOAD_TOP_EDGE LOAD_TOP_EDGE
...@@ -215,7 +238,10 @@ static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, ...@@ -215,7 +238,10 @@ static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2; src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
} }
static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src,
const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
...@@ -238,7 +264,9 @@ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *top ...@@ -238,7 +264,9 @@ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *top
src[3+3*stride]=l3; src[3+3*stride]=l3;
} }
static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
uint8_t *top = src-stride; uint8_t *top = src-stride;
int y; int y;
...@@ -253,15 +281,18 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ ...@@ -253,15 +281,18 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
} }
} }
static void pred16x16_plane_svq3_c(uint8_t *src, int stride){ static void pred16x16_plane_svq3_c(uint8_t *src, ptrdiff_t stride)
{
pred16x16_plane_compat_8_c(src, stride, 1, 0); pred16x16_plane_compat_8_c(src, stride, 1, 0);
} }
static void pred16x16_plane_rv40_c(uint8_t *src, int stride){ static void pred16x16_plane_rv40_c(uint8_t *src, ptrdiff_t stride)
{
pred16x16_plane_compat_8_c(src, stride, 0, 1); pred16x16_plane_compat_8_c(src, stride, 0, 1);
} }
static void pred16x16_tm_vp8_c(uint8_t *src, int stride){ static void pred16x16_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
uint8_t *top = src-stride; uint8_t *top = src-stride;
int y; int y;
...@@ -288,7 +319,8 @@ static void pred16x16_tm_vp8_c(uint8_t *src, int stride){ ...@@ -288,7 +319,8 @@ static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){ static void pred8x8_left_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
{
int i; int i;
unsigned dc0; unsigned dc0;
...@@ -303,7 +335,8 @@ static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){ ...@@ -303,7 +335,8 @@ static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){ static void pred8x8_top_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
{
int i; int i;
unsigned dc0; unsigned dc0;
...@@ -318,7 +351,8 @@ static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){ ...@@ -318,7 +351,8 @@ static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_dc_rv40_c(uint8_t *src, int stride){ static void pred8x8_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
{
int i; int i;
unsigned dc0 = 0; unsigned dc0 = 0;
...@@ -339,7 +373,8 @@ static void pred8x8_dc_rv40_c(uint8_t *src, int stride){ ...@@ -339,7 +373,8 @@ static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
uint8_t *top = src-stride; uint8_t *top = src-stride;
int y; int y;
...@@ -361,7 +396,9 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ ...@@ -361,7 +396,9 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
/** /**
* Set the intra prediction function pointers. * Set the intra prediction function pointers.
*/ */
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth,
const int chroma_format_idc)
{
// MpegEncContext * const s = &h->s; // MpegEncContext * const s = &h->s;
#undef FUNC #undef FUNC
......
...@@ -90,21 +90,23 @@ ...@@ -90,21 +90,23 @@
* Context for storing H.264 prediction functions * Context for storing H.264 prediction functions
*/ */
typedef struct H264PredContext { typedef struct H264PredContext {
void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp? void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright,
void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride); ptrdiff_t stride);
void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride); void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright,
void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride); ptrdiff_t stride);
void(*pred8x8[4 + 3 + 4])(uint8_t *src, ptrdiff_t stride);
void(*pred16x16[4 + 3 + 2])(uint8_t *src, ptrdiff_t stride);
void(*pred4x4_add[2])(uint8_t *pix /*align 4*/, void(*pred4x4_add[2])(uint8_t *pix /*align 4*/,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, void(*pred8x8_add[3])(uint8_t *pix /*align 8*/,
const int *block_offset, const int *block_offset,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
void(*pred16x16_add[3])(uint8_t *pix /*align 16*/, void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
const int *block_offset, const int *block_offset,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
} H264PredContext; } H264PredContext;
void ff_h264_pred_init(H264PredContext *h, int codec_id, void ff_h264_pred_init(H264PredContext *h, int codec_id,
......
This diff is collapsed.
...@@ -23,7 +23,9 @@ ...@@ -23,7 +23,9 @@
#include "libavcodec/h264pred.h" #include "libavcodec/h264pred.h"
#define PRED4x4(TYPE, DEPTH, OPT) \ #define PRED4x4(TYPE, DEPTH, OPT) \
void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
const uint8_t *topright, \
ptrdiff_t stride);
PRED4x4(dc, 10, mmx2) PRED4x4(dc, 10, mmx2)
PRED4x4(down_left, 10, sse2) PRED4x4(down_left, 10, sse2)
...@@ -42,7 +44,8 @@ PRED4x4(horizontal_down, 10, ssse3) ...@@ -42,7 +44,8 @@ PRED4x4(horizontal_down, 10, ssse3)
PRED4x4(horizontal_down, 10, avx) PRED4x4(horizontal_down, 10, avx)
#define PRED8x8(TYPE, DEPTH, OPT) \ #define PRED8x8(TYPE, DEPTH, OPT) \
void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride); void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED8x8(dc, 10, mmx2) PRED8x8(dc, 10, mmx2)
PRED8x8(dc, 10, sse2) PRED8x8(dc, 10, sse2)
...@@ -52,7 +55,10 @@ PRED8x8(vertical, 10, sse2) ...@@ -52,7 +55,10 @@ PRED8x8(vertical, 10, sse2)
PRED8x8(horizontal, 10, sse2) PRED8x8(horizontal, 10, sse2)
#define PRED8x8L(TYPE, DEPTH, OPT)\ #define PRED8x8L(TYPE, DEPTH, OPT)\
void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
int has_topleft, \
int has_topright, \
ptrdiff_t stride);
PRED8x8L(dc, 10, sse2) PRED8x8L(dc, 10, sse2)
PRED8x8L(dc, 10, avx) PRED8x8L(dc, 10, avx)
...@@ -79,7 +85,8 @@ PRED8x8L(horizontal_up, 10, ssse3) ...@@ -79,7 +85,8 @@ PRED8x8L(horizontal_up, 10, ssse3)
PRED8x8L(horizontal_up, 10, avx) PRED8x8L(horizontal_up, 10, avx)
#define PRED16x16(TYPE, DEPTH, OPT)\ #define PRED16x16(TYPE, DEPTH, OPT)\
void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride); void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED16x16(dc, 10, mmx2) PRED16x16(dc, 10, mmx2)
PRED16x16(dc, 10, sse2) PRED16x16(dc, 10, sse2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment