Commit a8b60158 authored by Luca Barbato's avatar Luca Barbato

dsputil: convert remaining functions to use ptrdiff_t strides

Signed-off-by: 's avatarLuca Barbato <lu_zero@gentoo.org>
parent a4472ac0
...@@ -26,73 +26,73 @@ ...@@ -26,73 +26,73 @@
#include "libavutil/arm/cpu.h" #include "libavutil/arm/cpu.h"
#include "libavcodec/h264qpel.h" #include "libavcodec/h264qpel.h"
void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int); void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, ptrdiff_t);
void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, ptrdiff_t);
av_cold void ff_h264qpel_init_arm(H264QpelContext *c, int bit_depth) av_cold void ff_h264qpel_init_arm(H264QpelContext *c, int bit_depth)
{ {
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#define DECL_QPEL3(type, w, pos) \ #define DECL_QPEL3(type, w, pos) \
void ff_##type##_rv40_qpel##w##_mc##pos##_neon(uint8_t *dst, uint8_t *src,\ void ff_##type##_rv40_qpel##w##_mc##pos##_neon(uint8_t *dst, uint8_t *src,\
int stride) ptrdiff_t stride)
#define DECL_QPEL2(w, pos) \ #define DECL_QPEL2(w, pos) \
DECL_QPEL3(put, w, pos); \ DECL_QPEL3(put, w, pos); \
DECL_QPEL3(avg, w, pos) DECL_QPEL3(avg, w, pos)
......
...@@ -421,63 +421,78 @@ static void OPNAME ## cavs_filt16_hv_ ## NAME(uint8_t *dst, uint8_t *src1, uint8 ...@@ -421,63 +421,78 @@ static void OPNAME ## cavs_filt16_hv_ ## NAME(uint8_t *dst, uint8_t *src1, uint8
}\ }\
#define CAVS_MC(OPNAME, SIZE) \ #define CAVS_MC(OPNAME, SIZE) \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _h_qpel_l(dst, src, stride, stride);\ OPNAME ## cavs_filt ## SIZE ## _h_qpel_l(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _h_hpel(dst, src, stride, stride);\ OPNAME ## cavs_filt ## SIZE ## _h_hpel(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _h_qpel_r(dst, src, stride, stride);\ OPNAME ## cavs_filt ## SIZE ## _h_qpel_r(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _v_qpel_l(dst, src, stride, stride);\ OPNAME ## cavs_filt ## SIZE ## _v_qpel_l(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _v_hpel(dst, src, stride, stride);\ OPNAME ## cavs_filt ## SIZE ## _v_hpel(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _v_qpel_r(dst, src, stride, stride);\ OPNAME ## cavs_filt ## SIZE ## _v_qpel_r(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_jj(dst, src, NULL, stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_jj(dst, src, NULL, stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src, stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src, stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride, stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride, stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+1, stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+1, stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride+1,stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_egpr(dst, src, src+stride+1,stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_ff(dst, src, src+stride+1,stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_ff(dst, src, src+stride+1,stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_ii(dst, src, src+stride+1,stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_ii(dst, src, src+stride+1,stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_kk(dst, src, src+stride+1,stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_kk(dst, src, src+stride+1,stride, stride); \
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_filt ## SIZE ## _hv_qq(dst, src, src+stride+1,stride, stride); \ OPNAME ## cavs_filt ## SIZE ## _hv_qq(dst, src, src+stride+1,stride, stride); \
}\ }\
......
...@@ -867,23 +867,27 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst ...@@ -867,23 +867,27 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst
}\ }\
}\ }\
\ \
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t half[64];\ uint8_t half[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\ OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
}\ }\
\ \
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
}\ }\
\ \
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t half[64];\ uint8_t half[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\ OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
}\ }\
\ \
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t half[64];\ uint8_t half[64];\
copy_block9(full, src, 16, stride, 9);\ copy_block9(full, src, 16, stride, 9);\
...@@ -891,20 +895,23 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -891,20 +895,23 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\ OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
}\ }\
\ \
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
copy_block9(full, src, 16, stride, 9);\ copy_block9(full, src, 16, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
}\ }\
\ \
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t half[64];\ uint8_t half[64];\
copy_block9(full, src, 16, stride, 9);\ copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\ OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
}\ }\
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfV[64];\ uint8_t halfV[64];\
...@@ -915,7 +922,8 @@ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -915,7 +922,8 @@ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfHV[64];\ uint8_t halfHV[64];\
...@@ -925,7 +933,8 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -925,7 +933,8 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
}\ }\
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfV[64];\ uint8_t halfV[64];\
...@@ -936,7 +945,8 @@ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -936,7 +945,8 @@ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfHV[64];\ uint8_t halfHV[64];\
...@@ -946,7 +956,8 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -946,7 +956,8 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
}\ }\
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfV[64];\ uint8_t halfV[64];\
...@@ -957,7 +968,8 @@ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -957,7 +968,8 @@ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfHV[64];\ uint8_t halfHV[64];\
...@@ -967,7 +979,8 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -967,7 +979,8 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\ }\
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfV[64];\ uint8_t halfV[64];\
...@@ -978,7 +991,8 @@ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -978,7 +991,8 @@ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfHV[64];\ uint8_t halfHV[64];\
...@@ -988,21 +1002,24 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -988,21 +1002,24 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfHV[64];\ uint8_t halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfHV[64];\ uint8_t halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\ }\
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfV[64];\ uint8_t halfV[64];\
...@@ -1013,7 +1030,8 @@ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1013,7 +1030,8 @@ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
copy_block9(full, src, 16, stride, 9);\ copy_block9(full, src, 16, stride, 9);\
...@@ -1021,7 +1039,8 @@ static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1021,7 +1039,8 @@ static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\ }\
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
uint8_t halfV[64];\ uint8_t halfV[64];\
...@@ -1032,7 +1051,8 @@ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1032,7 +1051,8 @@ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
}\ }\
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[16*9];\ uint8_t full[16*9];\
uint8_t halfH[72];\ uint8_t halfH[72];\
copy_block9(full, src, 16, stride, 9);\ copy_block9(full, src, 16, stride, 9);\
...@@ -1040,29 +1060,34 @@ static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1040,29 +1060,34 @@ static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\ }\
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t halfH[72];\ uint8_t halfH[72];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\ }\
\ \
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t half[256];\ uint8_t half[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\ OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
}\ }\
\ \
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
}\ }\
\ \
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t half[256];\ uint8_t half[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\ OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
}\ }\
\ \
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t half[256];\ uint8_t half[256];\
copy_block17(full, src, 24, stride, 17);\ copy_block17(full, src, 24, stride, 17);\
...@@ -1070,20 +1095,23 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1070,20 +1095,23 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\ OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
}\ }\
\ \
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
copy_block17(full, src, 24, stride, 17);\ copy_block17(full, src, 24, stride, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
}\ }\
\ \
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t half[256];\ uint8_t half[256];\
copy_block17(full, src, 24, stride, 17);\ copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\ OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
}\ }\
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfV[256];\ uint8_t halfV[256];\
...@@ -1094,7 +1122,8 @@ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1094,7 +1122,8 @@ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfHV[256];\ uint8_t halfHV[256];\
...@@ -1104,7 +1133,8 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1104,7 +1133,8 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
}\ }\
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfV[256];\ uint8_t halfV[256];\
...@@ -1115,7 +1145,8 @@ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1115,7 +1145,8 @@ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfHV[256];\ uint8_t halfHV[256];\
...@@ -1125,7 +1156,8 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1125,7 +1156,8 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
}\ }\
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfV[256];\ uint8_t halfV[256];\
...@@ -1136,7 +1168,8 @@ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1136,7 +1168,8 @@ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfHV[256];\ uint8_t halfHV[256];\
...@@ -1146,7 +1179,8 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1146,7 +1179,8 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\ }\
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfV[256];\ uint8_t halfV[256];\
...@@ -1157,7 +1191,8 @@ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1157,7 +1191,8 @@ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfHV[256];\ uint8_t halfHV[256];\
...@@ -1167,21 +1202,24 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1167,21 +1202,24 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfHV[256];\ uint8_t halfHV[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfHV[256];\ uint8_t halfHV[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\ }\
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfV[256];\ uint8_t halfV[256];\
...@@ -1192,7 +1230,8 @@ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1192,7 +1230,8 @@ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
copy_block17(full, src, 24, stride, 17);\ copy_block17(full, src, 24, stride, 17);\
...@@ -1200,7 +1239,8 @@ static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1200,7 +1239,8 @@ static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\ }\
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
uint8_t halfV[256];\ uint8_t halfV[256];\
...@@ -1211,7 +1251,8 @@ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1211,7 +1251,8 @@ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
}\ }\
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[24*17];\ uint8_t full[24*17];\
uint8_t halfH[272];\ uint8_t halfH[272];\
copy_block17(full, src, 24, stride, 17);\ copy_block17(full, src, 24, stride, 17);\
...@@ -1219,7 +1260,8 @@ static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ ...@@ -1219,7 +1260,8 @@ static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\ }\
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t halfH[272];\ uint8_t halfH[272];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
...@@ -1265,16 +1307,20 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int ...@@ -1265,16 +1307,20 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
} }
#if CONFIG_RV40_DECODER #if CONFIG_RV40_DECODER
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
put_pixels16_xy2_8_c(dst, src, stride, 16); put_pixels16_xy2_8_c(dst, src, stride, 16);
} }
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
avg_pixels16_xy2_8_c(dst, src, stride, 16); avg_pixels16_xy2_8_c(dst, src, stride, 16);
} }
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
put_pixels8_xy2_8_c(dst, src, stride, 8); put_pixels8_xy2_8_c(dst, src, stride, 8);
} }
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
avg_pixels8_xy2_8_c(dst, src, stride, 8); avg_pixels8_xy2_8_c(dst, src, stride, 8);
} }
#endif /* CONFIG_RV40_DECODER */ #endif /* CONFIG_RV40_DECODER */
...@@ -1308,27 +1354,32 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int ...@@ -1308,27 +1354,32 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
} }
} }
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
uint8_t half[64]; uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8); put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
} }
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
} }
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
uint8_t half[64]; uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8); put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
} }
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
} }
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
uint8_t halfH[88]; uint8_t halfH[88];
uint8_t halfV[64]; uint8_t halfV[64];
uint8_t halfHV[64]; uint8_t halfHV[64];
...@@ -1337,7 +1388,8 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1337,7 +1388,8 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
} }
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
uint8_t halfH[88]; uint8_t halfH[88];
uint8_t halfV[64]; uint8_t halfV[64];
uint8_t halfHV[64]; uint8_t halfHV[64];
...@@ -1346,7 +1398,8 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ ...@@ -1346,7 +1398,8 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
} }
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
uint8_t halfH[88]; uint8_t halfH[88];
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
......
...@@ -50,10 +50,10 @@ extern uint32_t ff_squareTbl[512]; ...@@ -50,10 +50,10 @@ extern uint32_t ff_squareTbl[512];
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
#define PUTAVG_PIXELS(depth)\ #define PUTAVG_PIXELS(depth)\
void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\ void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);\
void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\ void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);\
void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\ void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);\
void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride); void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
PUTAVG_PIXELS( 8) PUTAVG_PIXELS( 8)
PUTAVG_PIXELS( 9) PUTAVG_PIXELS( 9)
...@@ -65,10 +65,10 @@ PUTAVG_PIXELS(10) ...@@ -65,10 +65,10 @@ PUTAVG_PIXELS(10)
#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c #define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
/* RV40 functions */ /* RV40 functions */
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride); void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride); void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride); void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride); void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
...@@ -91,14 +91,14 @@ could be reached easily ... ...@@ -91,14 +91,14 @@ could be reached easily ...
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4 //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller than 4
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h); typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h);
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);
typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h); typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
#define DEF_OLD_QPEL(name)\ #define DEF_OLD_QPEL(name)\
void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);\
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);\
void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, ptrdiff_t stride);
DEF_OLD_QPEL(qpel16_mc11_old_c) DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c) DEF_OLD_QPEL(qpel16_mc31_old_c)
......
...@@ -421,15 +421,19 @@ PIXOP2(put, op_put) ...@@ -421,15 +421,19 @@ PIXOP2(put, op_put)
#undef op_avg #undef op_avg
#undef op_put #undef op_put
void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
FUNCC(put_pixels8)(dst, src, stride, 8); FUNCC(put_pixels8)(dst, src, stride, 8);
} }
void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
FUNCC(avg_pixels8)(dst, src, stride, 8); FUNCC(avg_pixels8)(dst, src, stride, 8);
} }
void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
FUNCC(put_pixels16)(dst, src, stride, 16); FUNCC(put_pixels16)(dst, src, stride, 16);
} }
void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
FUNCC(avg_pixels16)(dst, src, stride, 16); FUNCC(avg_pixels16)(dst, src, stride, 16);
} }
...@@ -375,27 +375,32 @@ static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, u ...@@ -375,27 +375,32 @@ static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, u
}\ }\
#define H264_MC(OPNAME, SIZE) \ #define H264_MC(OPNAME, SIZE) \
static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\ static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\ FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t half[SIZE*SIZE*sizeof(pixel)];\ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\ FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t half[SIZE*SIZE*sizeof(pixel)];\ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\ FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
uint8_t half[SIZE*SIZE*sizeof(pixel)];\ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
...@@ -404,14 +409,16 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *s ...@@ -404,14 +409,16 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\ FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\ FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
uint8_t half[SIZE*SIZE*sizeof(pixel)];\ uint8_t half[SIZE*SIZE*sizeof(pixel)];\
...@@ -420,7 +427,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *s ...@@ -420,7 +427,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
...@@ -431,7 +439,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *s ...@@ -431,7 +439,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
...@@ -442,7 +451,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *s ...@@ -442,7 +451,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
...@@ -453,7 +463,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *s ...@@ -453,7 +463,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
...@@ -464,12 +475,14 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *s ...@@ -464,12 +475,14 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\ FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
...@@ -478,7 +491,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *s ...@@ -478,7 +491,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\ uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
...@@ -487,7 +501,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *s ...@@ -487,7 +501,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
...@@ -499,7 +514,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *s ...@@ -499,7 +514,8 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *s
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\ }\
\ \
static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
......
...@@ -65,43 +65,51 @@ ...@@ -65,43 +65,51 @@
#undef PREFIX_h264_qpel16_hv_lowpass_num #undef PREFIX_h264_qpel16_hv_lowpass_num
#define H264_MC(OPNAME, SIZE, CODETYPE) \ #define H264_MC(OPNAME, SIZE, CODETYPE) \
static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \ static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{ \
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\ OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
...@@ -109,7 +117,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint ...@@ -109,7 +117,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
...@@ -117,7 +126,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint ...@@ -117,7 +126,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
...@@ -125,7 +135,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint ...@@ -125,7 +135,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
...@@ -133,12 +144,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint ...@@ -133,12 +144,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
...@@ -147,7 +160,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint ...@@ -147,7 +160,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
...@@ -156,7 +170,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint ...@@ -156,7 +170,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
...@@ -165,7 +180,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint ...@@ -165,7 +180,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
......
...@@ -210,35 +210,43 @@ static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, uint8_t *src, int d ...@@ -210,35 +210,43 @@ static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, uint8_t *src, int d
\ \
#define RV30_MC(OPNAME, SIZE) \ #define RV30_MC(OPNAME, SIZE) \
static void OPNAME ## rv30_tpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _h_lowpass(dst, src, stride, stride, 12, 6);\ OPNAME ## rv30_tpel ## SIZE ## _h_lowpass(dst, src, stride, stride, 12, 6);\
}\ }\
\ \
static void OPNAME ## rv30_tpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _h_lowpass(dst, src, stride, stride, 6, 12);\ OPNAME ## rv30_tpel ## SIZE ## _h_lowpass(dst, src, stride, stride, 6, 12);\
}\ }\
\ \
static void OPNAME ## rv30_tpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 12, 6);\ OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 12, 6);\
}\ }\
\ \
static void OPNAME ## rv30_tpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 6, 12);\ OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 6, 12);\
}\ }\
\ \
static void OPNAME ## rv30_tpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _hv_lowpass(dst, src, stride, stride);\ OPNAME ## rv30_tpel ## SIZE ## _hv_lowpass(dst, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## rv30_tpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _hvv_lowpass(dst, src, stride, stride);\ OPNAME ## rv30_tpel ## SIZE ## _hvv_lowpass(dst, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## rv30_tpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _hhv_lowpass(dst, src, stride, stride);\ OPNAME ## rv30_tpel ## SIZE ## _hhv_lowpass(dst, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## rv30_tpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv30_tpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv30_tpel ## SIZE ## _hhvv_lowpass(dst, src, stride, stride);\ OPNAME ## rv30_tpel ## SIZE ## _hhvv_lowpass(dst, src, stride, stride);\
}\ }\
\ \
......
...@@ -103,72 +103,84 @@ static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstS ...@@ -103,72 +103,84 @@ static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstS
\ \
#define RV40_MC(OPNAME, SIZE) \ #define RV40_MC(OPNAME, SIZE) \
static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\ OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
}\ }\
\ \
static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)];\ uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid = full + SIZE*2;\ uint8_t * const full_mid = full + SIZE*2;\
put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
......
...@@ -627,10 +627,16 @@ VC1_MSPEL_MC(op_avg, avg_) ...@@ -627,10 +627,16 @@ VC1_MSPEL_MC(op_avg, avg_)
/* pixel functions - really are entry points to vc1_mspel_mc */ /* pixel functions - really are entry points to vc1_mspel_mc */
#define PUT_VC1_MSPEL(a, b)\ #define PUT_VC1_MSPEL(a, b)\
static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t stride, int rnd) \
{ \
put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
}\ } \
static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t stride, int rnd) \
{ \
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
} }
......
...@@ -409,19 +409,23 @@ static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstS ...@@ -409,19 +409,23 @@ static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstS
}\ }\
#define CAVS_MC(OPNAME, SIZE, MMX) \ #define CAVS_MC(OPNAME, SIZE, MMX) \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
}\ }\
\ \
static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
}\ }\
......
...@@ -460,7 +460,7 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, ...@@ -460,7 +460,7 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
} }
static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
__asm__ volatile ( __asm__ volatile (
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
...@@ -487,7 +487,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ...@@ -487,7 +487,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
} }
static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
int line_size, int h) ptrdiff_t line_size, int h)
{ {
__asm__ volatile ( __asm__ volatile (
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"REG_a" \n\t"
...@@ -750,13 +750,13 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, ...@@ -750,13 +750,13 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
#if HAVE_YASM #if HAVE_YASM
#define QPEL_OP(OPNAME, ROUNDER, RND, MMX) \ #define QPEL_OP(OPNAME, ROUNDER, RND, MMX) \
static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \ ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
} \ } \
\ \
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[8]; \ uint64_t temp[8]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -767,14 +767,14 @@ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -767,14 +767,14 @@ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \ ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
stride, 8); \ stride, 8); \
} \ } \
\ \
static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[8]; \ uint64_t temp[8]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -785,7 +785,7 @@ static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -785,7 +785,7 @@ static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[8]; \ uint64_t temp[8]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -796,14 +796,14 @@ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -796,14 +796,14 @@ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \ ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
stride, stride); \ stride, stride); \
} \ } \
\ \
static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[8]; \ uint64_t temp[8]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -814,7 +814,7 @@ static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -814,7 +814,7 @@ static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half) + 64; \ uint8_t * const halfH = ((uint8_t*)half) + 64; \
...@@ -829,7 +829,7 @@ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -829,7 +829,7 @@ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half) + 64; \ uint8_t * const halfH = ((uint8_t*)half) + 64; \
...@@ -844,7 +844,7 @@ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -844,7 +844,7 @@ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half) + 64; \ uint8_t * const halfH = ((uint8_t*)half) + 64; \
...@@ -859,7 +859,7 @@ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -859,7 +859,7 @@ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half) + 64; \ uint8_t * const halfH = ((uint8_t*)half) + 64; \
...@@ -874,7 +874,7 @@ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -874,7 +874,7 @@ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half) + 64; \ uint8_t * const halfH = ((uint8_t*)half) + 64; \
...@@ -887,7 +887,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -887,7 +887,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half) + 64; \ uint8_t * const halfH = ((uint8_t*)half) + 64; \
...@@ -900,7 +900,7 @@ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -900,7 +900,7 @@ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half); \ uint8_t * const halfH = ((uint8_t*)half); \
...@@ -913,7 +913,7 @@ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -913,7 +913,7 @@ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[8 + 9]; \ uint64_t half[8 + 9]; \
uint8_t * const halfH = ((uint8_t*)half); \ uint8_t * const halfH = ((uint8_t*)half); \
...@@ -926,7 +926,7 @@ static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -926,7 +926,7 @@ static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[9]; \ uint64_t half[9]; \
uint8_t * const halfH = ((uint8_t*)half); \ uint8_t * const halfH = ((uint8_t*)half); \
...@@ -937,13 +937,13 @@ static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -937,13 +937,13 @@ static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \ ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
} \ } \
\ \
static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[32]; \ uint64_t temp[32]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -954,14 +954,14 @@ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -954,14 +954,14 @@ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \ ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
stride, stride, 16);\ stride, stride, 16);\
} \ } \
\ \
static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[32]; \ uint64_t temp[32]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -972,7 +972,7 @@ static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -972,7 +972,7 @@ static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[32]; \ uint64_t temp[32]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -983,14 +983,14 @@ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -983,14 +983,14 @@ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \ ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
stride, stride); \ stride, stride); \
} \ } \
\ \
static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t temp[32]; \ uint64_t temp[32]; \
uint8_t * const half = (uint8_t*)temp; \ uint8_t * const half = (uint8_t*)temp; \
...@@ -1001,7 +1001,7 @@ static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1001,7 +1001,7 @@ static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[16 * 2 + 17 * 2]; \ uint64_t half[16 * 2 + 17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half) + 256; \ uint8_t * const halfH = ((uint8_t*)half) + 256; \
...@@ -1017,7 +1017,7 @@ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1017,7 +1017,7 @@ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[16 * 2 + 17 * 2]; \ uint64_t half[16 * 2 + 17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half) + 256; \ uint8_t * const halfH = ((uint8_t*)half) + 256; \
...@@ -1033,7 +1033,7 @@ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1033,7 +1033,7 @@ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[16 * 2 + 17 * 2]; \ uint64_t half[16 * 2 + 17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half) + 256; \ uint8_t * const halfH = ((uint8_t*)half) + 256; \
...@@ -1049,7 +1049,7 @@ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1049,7 +1049,7 @@ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[16 * 2 + 17 * 2]; \ uint64_t half[16 * 2 + 17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half) + 256; \ uint8_t * const halfH = ((uint8_t*)half) + 256; \
...@@ -1065,7 +1065,7 @@ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1065,7 +1065,7 @@ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[16 * 2 + 17 * 2]; \ uint64_t half[16 * 2 + 17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half) + 256; \ uint8_t * const halfH = ((uint8_t*)half) + 256; \
...@@ -1079,7 +1079,7 @@ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1079,7 +1079,7 @@ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[16 * 2 + 17 * 2]; \ uint64_t half[16 * 2 + 17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half) + 256; \ uint8_t * const halfH = ((uint8_t*)half) + 256; \
...@@ -1093,7 +1093,7 @@ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1093,7 +1093,7 @@ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[17 * 2]; \ uint64_t half[17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half); \ uint8_t * const halfH = ((uint8_t*)half); \
...@@ -1106,7 +1106,7 @@ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1106,7 +1106,7 @@ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[17 * 2]; \ uint64_t half[17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half); \ uint8_t * const halfH = ((uint8_t*)half); \
...@@ -1119,7 +1119,7 @@ static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \ ...@@ -1119,7 +1119,7 @@ static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
} \ } \
\ \
static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
uint64_t half[17 * 2]; \ uint64_t half[17 * 2]; \
uint8_t * const halfH = ((uint8_t*)half); \ uint8_t * const halfH = ((uint8_t*)half); \
...@@ -1136,19 +1136,19 @@ QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, mmxext) ...@@ -1136,19 +1136,19 @@ QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, mmxext)
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
put_pixels8_xy2_mmx(dst, src, stride, 8); put_pixels8_xy2_mmx(dst, src, stride, 8);
} }
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) void ff_put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
put_pixels16_xy2_mmx(dst, src, stride, 16); put_pixels16_xy2_mmx(dst, src, stride, 16);
} }
void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
avg_pixels8_xy2_mmx(dst, src, stride, 8); avg_pixels8_xy2_mmx(dst, src, stride, 8);
} }
void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
avg_pixels16_xy2_mmx(dst, src, stride, 16); avg_pixels16_xy2_mmx(dst, src, stride, 16);
} }
...@@ -1278,29 +1278,29 @@ void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ...@@ -1278,29 +1278,29 @@ void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
/* CAVS-specific */ /* CAVS-specific */
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
put_pixels8_mmx(dst, src, stride, 8); put_pixels8_mmx(dst, src, stride, 8);
} }
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
avg_pixels8_mmx(dst, src, stride, 8); avg_pixels8_mmx(dst, src, stride, 8);
} }
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
put_pixels16_mmx(dst, src, stride, 16); put_pixels16_mmx(dst, src, stride, 16);
} }
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{ {
avg_pixels16_mmx(dst, src, stride, 16); avg_pixels16_mmx(dst, src, stride, 16);
} }
/* VC-1-specific */ /* VC-1-specific */
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
int stride, int rnd) ptrdiff_t stride, int rnd)
{ {
put_pixels8_mmx(dst, src, stride, 8); put_pixels8_mmx(dst, src, stride, 8);
} }
......
...@@ -92,17 +92,17 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int ...@@ -92,17 +92,17 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd);
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);
void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);
void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, ptrdiff_t stride);
void ff_deinterlace_line_mmx(uint8_t *dst, void ff_deinterlace_line_mmx(uint8_t *dst,
const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m4, const uint8_t *lum_m3,
......
...@@ -250,81 +250,98 @@ H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ ...@@ -250,81 +250,98 @@ H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_put_pixels16_sse2(dst, src, stride, 16); ff_put_pixels16_sse2(dst, src, stride, 16);
} }
static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src,
ptrdiff_t stride)
{
ff_avg_pixels16_sse2(dst, src, stride, 16); ff_avg_pixels16_sse2(dst, src, stride, 16);
} }
#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\ ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
}\ }\
#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \ #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
}\ }\
#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
}\ }\
#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\ DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\ uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
...@@ -333,7 +350,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t * ...@@ -333,7 +350,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\ uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
...@@ -342,7 +360,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t * ...@@ -342,7 +360,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\ uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
...@@ -351,7 +370,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t * ...@@ -351,7 +370,8 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *
ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
}\ }\
\ \
static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\ uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
...@@ -398,7 +418,7 @@ H264_MC_816(H264_MC_HV, ssse3) ...@@ -398,7 +418,7 @@ H264_MC_816(H264_MC_HV, ssse3)
//10bit //10bit
#define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \ #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \ void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, int stride); (uint8_t *dst, uint8_t *src, ptrdiff_t stride);
#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \ #define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \ LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
......
...@@ -71,7 +71,7 @@ DECLARE_WEIGHT(ssse3) ...@@ -71,7 +71,7 @@ DECLARE_WEIGHT(ssse3)
#define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT) \ #define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT) \
static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst, \ static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst, \
uint8_t *src, \ uint8_t *src, \
int stride) \ ptrdiff_t stride) \
{ \ { \
int i; \ int i; \
if (PH && PV) { \ if (PH && PV) { \
......
...@@ -63,7 +63,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq) ...@@ -63,7 +63,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
} }
static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
int stride, int rnd) ptrdiff_t stride, int rnd)
{ {
ff_avg_pixels8_mmxext(dst, src, stride, 8); ff_avg_pixels8_mmxext(dst, src, stride, 8);
} }
......
...@@ -463,12 +463,17 @@ VC1_MSPEL_MC(avg_) ...@@ -463,12 +463,17 @@ VC1_MSPEL_MC(avg_)
/** Macro to ease bicubic filter interpolation functions declarations */ /** Macro to ease bicubic filter interpolation functions declarations */
#define DECLARE_FUNCTION(a, b) \ #define DECLARE_FUNCTION(a, b) \
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t stride, \
int rnd) \
{ \
put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
}\ }\
static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \
const uint8_t *src, \ const uint8_t *src, \
int stride, int rnd) \ ptrdiff_t stride, \
int rnd) \
{ \ { \
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment