Commit 368f5035 authored by Diego Biurrun's avatar Diego Biurrun

dsputil: Split off quarterpel bits into their own context

parent cb52a17c
...@@ -1551,6 +1551,7 @@ CONFIG_EXTRA=" ...@@ -1551,6 +1551,7 @@ CONFIG_EXTRA="
mpegvideo mpegvideo
mpegvideoenc mpegvideoenc
nettle nettle
qpeldsp
rangecoder rangecoder
riffdec riffdec
riffenc riffenc
...@@ -1704,7 +1705,7 @@ rdft_select="fft" ...@@ -1704,7 +1705,7 @@ rdft_select="fft"
mpegaudio_select="mpegaudiodsp" mpegaudio_select="mpegaudiodsp"
mpegaudiodsp_select="dct" mpegaudiodsp_select="dct"
mpegvideo_select="dsputil hpeldsp videodsp" mpegvideo_select="dsputil hpeldsp videodsp"
mpegvideoenc_select="dsputil mpegvideo" mpegvideoenc_select="dsputil mpegvideo qpeldsp"
# decoders / encoders # decoders / encoders
aac_decoder_select="mdct sinewin" aac_decoder_select="mdct sinewin"
...@@ -1730,7 +1731,7 @@ atrac3p_decoder_select="mdct sinewin" ...@@ -1730,7 +1731,7 @@ atrac3p_decoder_select="mdct sinewin"
bink_decoder_select="dsputil hpeldsp" bink_decoder_select="dsputil hpeldsp"
binkaudio_dct_decoder_select="mdct rdft dct sinewin" binkaudio_dct_decoder_select="mdct rdft dct sinewin"
binkaudio_rdft_decoder_select="mdct rdft sinewin" binkaudio_rdft_decoder_select="mdct rdft sinewin"
cavs_decoder_select="dsputil golomb h264chroma videodsp" cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp"
cllc_decoder_select="dsputil" cllc_decoder_select="dsputil"
comfortnoise_encoder_select="lpc" comfortnoise_encoder_select="lpc"
cook_decoder_select="dsputil mdct sinewin" cook_decoder_select="dsputil mdct sinewin"
...@@ -1766,7 +1767,7 @@ g2m_decoder_deps="zlib" ...@@ -1766,7 +1767,7 @@ g2m_decoder_deps="zlib"
g2m_decoder_select="dsputil" g2m_decoder_select="dsputil"
h261_decoder_select="error_resilience mpegvideo" h261_decoder_select="error_resilience mpegvideo"
h261_encoder_select="aandcttables mpegvideoenc" h261_encoder_select="aandcttables mpegvideoenc"
h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo" h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp"
h263_encoder_select="aandcttables h263dsp mpegvideoenc" h263_encoder_select="aandcttables h263dsp mpegvideoenc"
h263i_decoder_select="h263_decoder" h263i_decoder_select="h263_decoder"
h263p_encoder_select="h263_encoder" h263p_encoder_select="h263_encoder"
...@@ -1818,7 +1819,7 @@ msmpeg4v2_decoder_select="h263_decoder" ...@@ -1818,7 +1819,7 @@ msmpeg4v2_decoder_select="h263_decoder"
msmpeg4v2_encoder_select="h263_encoder" msmpeg4v2_encoder_select="h263_encoder"
msmpeg4v3_decoder_select="h263_decoder" msmpeg4v3_decoder_select="h263_decoder"
msmpeg4v3_encoder_select="h263_encoder" msmpeg4v3_encoder_select="h263_encoder"
mss2_decoder_select="error_resilience vc1_decoder" mss2_decoder_select="error_resilience qpeldsp vc1_decoder"
mxpeg_decoder_select="mjpeg_decoder" mxpeg_decoder_select="mjpeg_decoder"
nellymoser_decoder_select="mdct sinewin" nellymoser_decoder_select="mdct sinewin"
nellymoser_encoder_select="audio_frame_queue mdct sinewin" nellymoser_encoder_select="audio_frame_queue mdct sinewin"
...@@ -1860,7 +1861,7 @@ twinvq_decoder_select="mdct lsp sinewin" ...@@ -1860,7 +1861,7 @@ twinvq_decoder_select="mdct lsp sinewin"
utvideo_decoder_select="dsputil" utvideo_decoder_select="dsputil"
utvideo_encoder_select="dsputil huffman huffyuvencdsp" utvideo_encoder_select="dsputil huffman huffyuvencdsp"
vble_decoder_select="huffyuvdsp" vble_decoder_select="huffyuvdsp"
vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8" vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp"
vc1image_decoder_select="vc1_decoder" vc1image_decoder_select="vc1_decoder"
vorbis_decoder_select="mdct" vorbis_decoder_select="mdct"
vorbis_encoder_select="mdct" vorbis_encoder_select="mdct"
...@@ -1936,8 +1937,8 @@ wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" ...@@ -1936,8 +1937,8 @@ wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
# parsers # parsers
h264_parser_select="h264_decoder" h264_parser_select="h264_decoder"
mpeg4video_parser_select="error_resilience h263dsp mpegvideo"
mpegvideo_parser_select="error_resilience mpegvideo" mpegvideo_parser_select="error_resilience mpegvideo"
mpeg4video_parser_select="error_resilience h263dsp mpegvideo qpeldsp"
vc1_parser_select="mpegvideo" vc1_parser_select="mpegvideo"
# external libraries # external libraries
......
...@@ -63,6 +63,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideo_motion.o \ ...@@ -63,6 +63,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideo_motion.o \
mpegutils.o mpegutils.o
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
motion_est.o ratecontrol.o motion_est.o ratecontrol.o
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
OBJS-$(CONFIG_RANGECODER) += rangecoder.o OBJS-$(CONFIG_RANGECODER) += rangecoder.o
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes)
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "golomb.h" #include "golomb.h"
#include "h264chroma.h" #include "h264chroma.h"
#include "mathops.h" #include "mathops.h"
#include "qpeldsp.h"
#include "cavs.h" #include "cavs.h"
static const uint8_t alpha_tab[64] = { static const uint8_t alpha_tab[64] = {
......
...@@ -23,7 +23,9 @@ ...@@ -23,7 +23,9 @@
#define AVCODEC_CAVSDSP_H #define AVCODEC_CAVSDSP_H
#include <stdint.h> #include <stdint.h>
#include "dsputil.h"
#include "avcodec.h"
#include "qpeldsp.h"
typedef struct CAVSDSPContext { typedef struct CAVSDSPContext {
qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
......
...@@ -48,9 +48,6 @@ uint32_t ff_square_tab[512] = { 0, }; ...@@ -48,9 +48,6 @@ uint32_t ff_square_tab[512] = { 0, };
#undef BIT_DEPTH #undef BIT_DEPTH
#define BIT_DEPTH 8 #define BIT_DEPTH 8
#include "hpel_template.c"
#include "tpel_template.c"
#include "dsputil_template.c"
#include "dsputilenc_template.c" #include "dsputilenc_template.c"
/* Input permutation for the simple_idct_mmx */ /* Input permutation for the simple_idct_mmx */
...@@ -485,701 +482,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, ...@@ -485,701 +482,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
} }
} }
#define QPEL_MC(r, OPNAME, RND, OP) \
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride, \
int h) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int i; \
\
for (i = 0; i < h; i++) { \
OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \
OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \
OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \
dst += dstStride; \
src += srcStride; \
} \
} \
\
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
const int w = 8; \
int i; \
\
for (i = 0; i < w; i++) { \
const int src0 = src[0 * srcStride]; \
const int src1 = src[1 * srcStride]; \
const int src2 = src[2 * srcStride]; \
const int src3 = src[3 * srcStride]; \
const int src4 = src[4 * srcStride]; \
const int src5 = src[5 * srcStride]; \
const int src6 = src[6 * srcStride]; \
const int src7 = src[7 * srcStride]; \
const int src8 = src[8 * srcStride]; \
OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \
OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \
OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \
dst++; \
src++; \
} \
} \
\
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride, \
int h) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int i; \
\
for (i = 0; i < h; i++) { \
OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \
OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \
OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \
OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \
OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \
OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \
OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \
OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \
OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \
OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \
OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \
dst += dstStride; \
src += srcStride; \
} \
} \
\
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
const int w = 16; \
int i; \
\
for (i = 0; i < w; i++) { \
const int src0 = src[0 * srcStride]; \
const int src1 = src[1 * srcStride]; \
const int src2 = src[2 * srcStride]; \
const int src3 = src[3 * srcStride]; \
const int src4 = src[4 * srcStride]; \
const int src5 = src[5 * srcStride]; \
const int src6 = src[6 * srcStride]; \
const int src7 = src[7 * srcStride]; \
const int src8 = src[8 * srcStride]; \
const int src9 = src[9 * srcStride]; \
const int src10 = src[10 * srcStride]; \
const int src11 = src[11 * srcStride]; \
const int src12 = src[12 * srcStride]; \
const int src13 = src[13 * srcStride]; \
const int src14 = src[14 * srcStride]; \
const int src15 = src[15 * srcStride]; \
const int src16 = src[16 * srcStride]; \
OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \
OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \
OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \
OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \
OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \
OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \
OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \
OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \
OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \
OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \
OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \
dst++; \
src++; \
} \
} \
\
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t half[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
\
copy_block9(full, src, 16, stride, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \
} \
\
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t half[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[72]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
} \
\
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t half[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
\
copy_block17(full, src, 24, stride, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \
} \
\
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t half[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[272]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
}
#define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1)
#define op_avg_no_rnd(a, b) a = (((a) + cm[((b) + 15) >> 5]) >> 1)
#define op_put(a, b) a = cm[((b) + 16) >> 5]
#define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5]
QPEL_MC(0, put_, _, op_put)
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
QPEL_MC(0, avg_, _, op_avg)
#undef op_avg
#undef op_put
#undef op_put_no_rnd
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
put_pixels8_8_c(dst, src, stride, 8);
}
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
avg_pixels8_8_c(dst, src, stride, 8);
}
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
put_pixels16_8_c(dst, src, stride, 16);
}
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
avg_pixels16_8_c(dst, src, stride, 16);
}
#define put_qpel8_mc00_c ff_put_pixels8x8_c
#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
#define put_qpel16_mc00_c ff_put_pixels16x16_c
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
int dst_stride, int src_stride1, int src_stride2,
int h)
{
put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
}
static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h) int line_size, int h)
{ {
...@@ -2198,35 +1500,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) ...@@ -2198,35 +1500,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->pix_abs[1][2] = pix_abs8_y2_c; c->pix_abs[1][2] = pix_abs8_y2_c;
c->pix_abs[1][3] = pix_abs8_xy2_c; c->pix_abs[1][3] = pix_abs8_xy2_c;
#define dspfunc(PFX, IDX, NUM) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \
c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \
c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \
c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \
c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \
c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \
c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \
c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \
c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \
c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \
c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
dspfunc(put_qpel, 0, 16);
dspfunc(put_qpel, 1, 8);
dspfunc(put_no_rnd_qpel, 0, 16);
dspfunc(put_no_rnd_qpel, 1, 8);
dspfunc(avg_qpel, 0, 16);
dspfunc(avg_qpel, 1, 8);
#undef dspfunc
#define SET_CMP_FUNC(name) \ #define SET_CMP_FUNC(name) \
c->name[0] = name ## 16_c; \ c->name[0] = name ## 16_c; \
c->name[1] = name ## 8x8_c; c->name[1] = name ## 8x8_c;
......
...@@ -34,15 +34,6 @@ ...@@ -34,15 +34,6 @@
extern uint32_t ff_square_tab[512]; extern uint32_t ff_square_tab[512];
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
int dst_stride, int src_stride1, int src_stride2,
int h);
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int dxx, int dxy, int dyx, int dyy, int shift, int r,
int width, int height); int width, int height);
...@@ -64,33 +55,9 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, ...@@ -64,33 +55,9 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
* h for op_pixels_func is limited to { width / 2, width }, * h for op_pixels_func is limited to { width / 2, width },
* but never larger than 16 and never smaller than 4. */ * but never larger than 16 and never smaller than 4. */
typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
uint8_t *src /* align 1 */, ptrdiff_t stride);
typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
uint8_t value, int line_size, int h); uint8_t value, int line_size, int h);
#define DEF_OLD_QPEL(name) \
void ff_put_ ## name(uint8_t *dst /* align width (8 or 16) */, \
uint8_t *src /* align 1 */, ptrdiff_t stride); \
void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */, \
uint8_t *src /* align 1 */, ptrdiff_t stride); \
void ff_avg_ ## name(uint8_t *dst /* align width (8 or 16) */, \
uint8_t *src /* align 1 */, ptrdiff_t stride);
DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c)
DEF_OLD_QPEL(qpel16_mc12_old_c)
DEF_OLD_QPEL(qpel16_mc32_old_c)
DEF_OLD_QPEL(qpel16_mc13_old_c)
DEF_OLD_QPEL(qpel16_mc33_old_c)
DEF_OLD_QPEL(qpel8_mc11_old_c)
DEF_OLD_QPEL(qpel8_mc31_old_c)
DEF_OLD_QPEL(qpel8_mc12_old_c)
DEF_OLD_QPEL(qpel8_mc32_old_c)
DEF_OLD_QPEL(qpel8_mc13_old_c)
DEF_OLD_QPEL(qpel8_mc33_old_c)
struct MpegEncContext; struct MpegEncContext;
/* Motion estimation: /* Motion estimation:
* h is limited to { width / 2, width, 2 * width }, * h is limited to { width / 2, width, 2 * width },
...@@ -174,10 +141,6 @@ typedef struct DSPContext { ...@@ -174,10 +141,6 @@ typedef struct DSPContext {
me_cmp_func ildct_cmp[6]; // only width 16 used me_cmp_func ildct_cmp[6]; // only width 16 used
me_cmp_func frame_skip_cmp[6]; // only width 8 used me_cmp_func frame_skip_cmp[6]; // only width 8 used
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
me_cmp_func pix_abs[2][4]; me_cmp_func pix_abs[2][4];
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "mpeg4video_parser.h" #include "mpeg4video_parser.h"
#include "mpegvideo.h" #include "mpegvideo.h"
#include "msmpeg4.h" #include "msmpeg4.h"
#include "qpeldsp.h"
#include "thread.h" #include "thread.h"
av_cold int ff_h263_decode_init(AVCodecContext *avctx) av_cold int ff_h263_decode_init(AVCodecContext *avctx)
...@@ -116,6 +117,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) ...@@ -116,6 +117,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx)
return ret; return ret;
ff_h263dsp_init(&s->h263dsp); ff_h263dsp_init(&s->h263dsp);
ff_qpeldsp_init(&s->qdsp);
ff_h263_decode_init_vlc(); ff_h263_decode_init_vlc();
return 0; return 0;
...@@ -461,9 +463,9 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -461,9 +463,9 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
avctx->has_b_frames = !s->low_delay; avctx->has_b_frames = !s->low_delay;
#define SET_QPEL_FUNC(postfix1, postfix2) \ #define SET_QPEL_FUNC(postfix1, postfix2) \
s->dsp.put_ ## postfix1 = ff_put_ ## postfix2; \ s->qdsp.put_ ## postfix1 = ff_put_ ## postfix2; \
s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2; \ s->qdsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2; \
s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2; s->qdsp.avg_ ## postfix1 = ff_avg_ ## postfix2;
if (s->workaround_bugs & FF_BUG_STD_QPEL) { if (s->workaround_bugs & FF_BUG_STD_QPEL) {
SET_QPEL_FUNC(qpel_pixels_tab[0][5], qpel16_mc11_old_c) SET_QPEL_FUNC(qpel_pixels_tab[0][5], qpel16_mc11_old_c)
...@@ -527,11 +529,11 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ...@@ -527,11 +529,11 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
} }
if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) { if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
s->me.qpel_put = s->dsp.put_qpel_pixels_tab; s->me.qpel_put = s->qdsp.put_qpel_pixels_tab;
s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab; s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
} else { } else {
s->me.qpel_put = s->dsp.put_no_rnd_qpel_pixels_tab; s->me.qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab;
s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab; s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
} }
if ((ret = ff_MPV_frame_start(s, avctx)) < 0) if ((ret = ff_MPV_frame_start(s, avctx)) < 0)
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "h264qpel.h" #include "h264qpel.h"
#include "mpegutils.h" #include "mpegutils.h"
#include "parser.h" #include "parser.h"
#include "qpeldsp.h"
#include "rectangle.h" #include "rectangle.h"
#include "videodsp.h" #include "videodsp.h"
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "avcodec.h" #include "avcodec.h"
#include "h264.h" #include "h264.h"
#include "qpeldsp.h"
#include "svq3.h" #include "svq3.h"
#include "thread.h" #include "thread.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef AVCODEC_H264QPEL_H #ifndef AVCODEC_H264QPEL_H
#define AVCODEC_H264QPEL_H #define AVCODEC_H264QPEL_H
#include "dsputil.h" #include "qpeldsp.h"
typedef struct H264QpelContext { typedef struct H264QpelContext {
qpel_mc_func put_h264_qpel_pixels_tab[4][16]; qpel_mc_func put_h264_qpel_pixels_tab[4][16];
......
...@@ -329,9 +329,11 @@ int ff_init_me(MpegEncContext *s){ ...@@ -329,9 +329,11 @@ int ff_init_me(MpegEncContext *s){
/*FIXME s->no_rounding b_type*/ /*FIXME s->no_rounding b_type*/
if(s->flags&CODEC_FLAG_QPEL){ if(s->flags&CODEC_FLAG_QPEL){
c->sub_motion_search= qpel_motion_search; c->sub_motion_search= qpel_motion_search;
c->qpel_avg= s->dsp.avg_qpel_pixels_tab; c->qpel_avg = s->qdsp.avg_qpel_pixels_tab;
if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab; if (s->no_rounding)
else c->qpel_put= s->dsp.put_qpel_pixels_tab; c->qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab;
else
c->qpel_put = s->qdsp.put_qpel_pixels_tab;
}else{ }else{
if(c->avctx->me_sub_cmp&FF_CMP_CHROMA) if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
c->sub_motion_search= hpel_motion_search; c->sub_motion_search= hpel_motion_search;
...@@ -622,9 +624,9 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) ...@@ -622,9 +624,9 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
dxy = ((my4 & 3) << 2) | (mx4 & 3); dxy = ((my4 & 3) << 2) | (mx4 & 3);
if(s->no_rounding) if(s->no_rounding)
s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride); s->qdsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
else else
s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride); s->qdsp.put_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
}else{ }else{
uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride; uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
dxy = ((my4 & 1) << 1) | (mx4 & 1); dxy = ((my4 & 1) << 1) | (mx4 & 1);
...@@ -1208,14 +1210,14 @@ static inline int check_bidir_mv(MpegEncContext * s, ...@@ -1208,14 +1210,14 @@ static inline int check_bidir_mv(MpegEncContext * s,
src_y = motion_fy >> 2; src_y = motion_fy >> 2;
ptr = ref_data[0] + (src_y * stride) + src_x; ptr = ref_data[0] + (src_y * stride) + src_x;
s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , stride); s->qdsp.put_qpel_pixels_tab[0][dxy](dest_y, ptr, stride);
dxy = ((motion_by & 3) << 2) | (motion_bx & 3); dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
src_x = motion_bx >> 2; src_x = motion_bx >> 2;
src_y = motion_by >> 2; src_y = motion_by >> 2;
ptr = ref2_data[0] + (src_y * stride) + src_x; ptr = ref2_data[0] + (src_y * stride) + src_x;
s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride); s->qdsp.avg_qpel_pixels_tab[size][dxy](dest_y, ptr, stride);
}else{ }else{
dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
src_x = motion_fx >> 1; src_x = motion_fx >> 1;
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "mpegvideo.h" #include "mpegvideo.h"
#include "mjpegenc.h" #include "mjpegenc.h"
#include "msmpeg4.h" #include "msmpeg4.h"
#include "qpeldsp.h"
#include "xvmc_internal.h" #include "xvmc_internal.h"
#include "thread.h" #include "thread.h"
#include <limits.h> #include <limits.h>
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "ratecontrol.h" #include "ratecontrol.h"
#include "parser.h" #include "parser.h"
#include "mpeg12data.h" #include "mpeg12data.h"
#include "qpeldsp.h"
#include "rl.h" #include "rl.h"
#include "thread.h" #include "thread.h"
#include "videodsp.h" #include "videodsp.h"
...@@ -348,6 +349,7 @@ typedef struct MpegEncContext { ...@@ -348,6 +349,7 @@ typedef struct MpegEncContext {
DSPContext dsp; ///< pointers for accelerated dsp functions DSPContext dsp; ///< pointers for accelerated dsp functions
HpelDSPContext hdsp; HpelDSPContext hdsp;
QpelDSPContext qdsp;
VideoDSPContext vdsp; VideoDSPContext vdsp;
H263DSPContext h263dsp; H263DSPContext h263dsp;
int f_code; ///< forward MV resolution int f_code; ///< forward MV resolution
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "mpegutils.h" #include "mpegutils.h"
#include "mjpegenc.h" #include "mjpegenc.h"
#include "msmpeg4.h" #include "msmpeg4.h"
#include "qpeldsp.h"
#include "faandct.h" #include "faandct.h"
#include "thread.h" #include "thread.h"
#include "aandcttab.h" #include "aandcttab.h"
...@@ -687,6 +688,8 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) ...@@ -687,6 +688,8 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
if (ARCH_X86) if (ARCH_X86)
ff_MPV_encode_init_x86(s); ff_MPV_encode_init_x86(s);
ff_qpeldsp_init(&s->qdsp);
s->avctx->coded_frame = s->current_picture.f; s->avctx->coded_frame = s->current_picture.f;
if (s->msmpeg4_version) { if (s->msmpeg4_version) {
...@@ -1944,10 +1947,10 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, ...@@ -1944,10 +1947,10 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) { if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
op_pix = s->hdsp.put_pixels_tab; op_pix = s->hdsp.put_pixels_tab;
op_qpix = s->dsp.put_qpel_pixels_tab; op_qpix = s->qdsp.put_qpel_pixels_tab;
} else { } else {
op_pix = s->hdsp.put_no_rnd_pixels_tab; op_pix = s->hdsp.put_no_rnd_pixels_tab;
op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab; op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
} }
if (s->mv_dir & MV_DIR_FORWARD) { if (s->mv_dir & MV_DIR_FORWARD) {
...@@ -1955,7 +1958,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, ...@@ -1955,7 +1958,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
s->last_picture.f->data, s->last_picture.f->data,
op_pix, op_qpix); op_pix, op_qpix);
op_pix = s->hdsp.avg_pixels_tab; op_pix = s->hdsp.avg_pixels_tab;
op_qpix = s->dsp.avg_qpel_pixels_tab; op_qpix = s->qdsp.avg_qpel_pixels_tab;
} }
if (s->mv_dir & MV_DIR_BACKWARD) { if (s->mv_dir & MV_DIR_BACKWARD) {
ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1, ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "mpegvideo.h" #include "mpegvideo.h"
#include "mjpegenc.h" #include "mjpegenc.h"
#include "msmpeg4.h" #include "msmpeg4.h"
#include "qpeldsp.h"
#include <limits.h> #include <limits.h>
static void gmc1_motion(MpegEncContext *s, static void gmc1_motion(MpegEncContext *s,
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "error_resilience.h" #include "error_resilience.h"
#include "internal.h" #include "internal.h"
#include "msmpeg4data.h" #include "msmpeg4data.h"
#include "qpeldsp.h"
#include "vc1.h" #include "vc1.h"
#include "mss12.h" #include "mss12.h"
#include "mss2dsp.h" #include "mss2dsp.h"
...@@ -37,6 +38,7 @@ typedef struct MSS2Context { ...@@ -37,6 +38,7 @@ typedef struct MSS2Context {
AVFrame *last_pic; AVFrame *last_pic;
MSS12Context c; MSS12Context c;
MSS2DSPContext dsp; MSS2DSPContext dsp;
QpelDSPContext qdsp;
SliceContext sc[2]; SliceContext sc[2];
} MSS2Context; } MSS2Context;
...@@ -787,8 +789,8 @@ static av_cold int wmv9_init(AVCodecContext *avctx) ...@@ -787,8 +789,8 @@ static av_cold int wmv9_init(AVCodecContext *avctx)
return ret; return ret;
/* error concealment */ /* error concealment */
v->s.me.qpel_put = v->s.dsp.put_qpel_pixels_tab; v->s.me.qpel_put = v->s.qdsp.put_qpel_pixels_tab;
v->s.me.qpel_avg = v->s.dsp.avg_qpel_pixels_tab; v->s.me.qpel_avg = v->s.qdsp.avg_qpel_pixels_tab;
return 0; return 0;
} }
...@@ -827,6 +829,7 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx) ...@@ -827,6 +829,7 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx)
return ret; return ret;
} }
ff_mss2dsp_init(&ctx->dsp); ff_mss2dsp_init(&ctx->dsp);
ff_qpeldsp_init(&ctx->qdsp);
avctx->pix_fmt = c->free_colours == 127 ? AV_PIX_FMT_RGB555 avctx->pix_fmt = c->free_colours == 127 ? AV_PIX_FMT_RGB555
: AV_PIX_FMT_RGB24; : AV_PIX_FMT_RGB24;
......
/* /*
* DSP utils * quarterpel DSP function templates
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
* *
* This file is part of Libav. * This file is part of Libav.
* *
...@@ -24,7 +20,7 @@ ...@@ -24,7 +20,7 @@
/** /**
* @file * @file
* DSP utils * quarterpel DSP function templates
*/ */
#define PIXOP2(OPNAME, OP) \ #define PIXOP2(OPNAME, OP) \
......
/*
* quarterpel DSP functions
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* quarterpel DSP functions
*/
#include <stddef.h>
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "copy_block.h"
#include "qpeldsp.h"
#define BIT_DEPTH 8
#include "hpel_template.c"
#include "tpel_template.c"
#include "qpel_template.c"
#define QPEL_MC(r, OPNAME, RND, OP) \
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride, \
int h) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int i; \
\
for (i = 0; i < h; i++) { \
OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \
OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \
OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \
dst += dstStride; \
src += srcStride; \
} \
} \
\
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
const int w = 8; \
int i; \
\
for (i = 0; i < w; i++) { \
const int src0 = src[0 * srcStride]; \
const int src1 = src[1 * srcStride]; \
const int src2 = src[2 * srcStride]; \
const int src3 = src[3 * srcStride]; \
const int src4 = src[4 * srcStride]; \
const int src5 = src[5 * srcStride]; \
const int src6 = src[6 * srcStride]; \
const int src7 = src[7 * srcStride]; \
const int src8 = src[8 * srcStride]; \
OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \
OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \
OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \
dst++; \
src++; \
} \
} \
\
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride, \
int h) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
int i; \
\
for (i = 0; i < h; i++) { \
OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[9])); \
OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[9]) * 3 - (src[3] + src[10])); \
OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[9]) * 6 + (src[5] + src[10]) * 3 - (src[4] + src[11])); \
OP(dst[8], (src[8] + src[9]) * 20 - (src[7] + src[10]) * 6 + (src[6] + src[11]) * 3 - (src[5] + src[12])); \
OP(dst[9], (src[9] + src[10]) * 20 - (src[8] + src[11]) * 6 + (src[7] + src[12]) * 3 - (src[6] + src[13])); \
OP(dst[10], (src[10] + src[11]) * 20 - (src[9] + src[12]) * 6 + (src[8] + src[13]) * 3 - (src[7] + src[14])); \
OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9] + src[14]) * 3 - (src[8] + src[15])); \
OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9] + src[16])); \
OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \
OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \
OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \
dst += dstStride; \
src += srcStride; \
} \
} \
\
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, \
int dstStride, int srcStride) \
{ \
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
const int w = 16; \
int i; \
\
for (i = 0; i < w; i++) { \
const int src0 = src[0 * srcStride]; \
const int src1 = src[1 * srcStride]; \
const int src2 = src[2 * srcStride]; \
const int src3 = src[3 * srcStride]; \
const int src4 = src[4 * srcStride]; \
const int src5 = src[5 * srcStride]; \
const int src6 = src[6 * srcStride]; \
const int src7 = src[7 * srcStride]; \
const int src8 = src[8 * srcStride]; \
const int src9 = src[9 * srcStride]; \
const int src10 = src[10 * srcStride]; \
const int src11 = src[11 * srcStride]; \
const int src12 = src[12 * srcStride]; \
const int src13 = src[13 * srcStride]; \
const int src14 = src[14 * srcStride]; \
const int src15 = src[15 * srcStride]; \
const int src16 = src[16 * srcStride]; \
OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src9)); \
OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src9) * 3 - (src3 + src10)); \
OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src9) * 6 + (src5 + src10) * 3 - (src4 + src11)); \
OP(dst[8 * dstStride], (src8 + src9) * 20 - (src7 + src10) * 6 + (src6 + src11) * 3 - (src5 + src12)); \
OP(dst[9 * dstStride], (src9 + src10) * 20 - (src8 + src11) * 6 + (src7 + src12) * 3 - (src6 + src13)); \
OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9 + src12) * 6 + (src8 + src13) * 3 - (src7 + src14)); \
OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9 + src14) * 3 - (src8 + src15)); \
OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9 + src16)); \
OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \
OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \
OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \
dst++; \
src++; \
} \
} \
\
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8); \
OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t half[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
\
copy_block9(full, src, 16, stride, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16); \
} \
\
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t half[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16); \
OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV, \
stride, 16, 8, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[72]; \
uint8_t halfHV[64]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
} \
\
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
uint8_t halfV[64]; \
uint8_t halfHV[64]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16); \
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8); \
OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[16 * 9]; \
uint8_t halfH[72]; \
\
copy_block9(full, src, 16, stride, 9); \
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9); \
put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[72]; \
\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9); \
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8); \
} \
\
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t half[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16); \
OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t half[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
\
copy_block17(full, src, 24, stride, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24); \
} \
\
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t half[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24); \
OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV, \
stride, 24, 16, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[272]; \
uint8_t halfHV[256]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
} \
\
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
uint8_t halfV[256]; \
uint8_t halfHV[256]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24); \
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16); \
OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t full[24 * 17]; \
uint8_t halfH[272]; \
\
copy_block17(full, src, 24, stride, 17); \
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17); \
put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint8_t halfH[272]; \
\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17); \
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16); \
}
#define op_avg(a, b) a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1)
#define op_put(a, b) a = cm[((b) + 16) >> 5]
#define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5]
QPEL_MC(0, put_, _, op_put)
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
QPEL_MC(0, avg_, _, op_avg)
#undef op_avg
#undef op_put
#undef op_put_no_rnd
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
put_pixels8_8_c(dst, src, stride, 8);
}
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
avg_pixels8_8_c(dst, src, stride, 8);
}
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
put_pixels16_8_c(dst, src, stride, 16);
}
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
avg_pixels16_8_c(dst, src, stride, 16);
}
#define put_qpel8_mc00_c ff_put_pixels8x8_c
#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
#define put_qpel16_mc00_c ff_put_pixels16x16_c
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
int dst_stride, int src_stride1, int src_stride2,
int h)
{
put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
}
av_cold void ff_qpeldsp_init(QpelDSPContext *c)
{
#define dspfunc(PFX, IDX, NUM) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \
c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \
c->PFX ## _pixels_tab[IDX][2] = PFX ## NUM ## _mc20_c; \
c->PFX ## _pixels_tab[IDX][3] = PFX ## NUM ## _mc30_c; \
c->PFX ## _pixels_tab[IDX][4] = PFX ## NUM ## _mc01_c; \
c->PFX ## _pixels_tab[IDX][5] = PFX ## NUM ## _mc11_c; \
c->PFX ## _pixels_tab[IDX][6] = PFX ## NUM ## _mc21_c; \
c->PFX ## _pixels_tab[IDX][7] = PFX ## NUM ## _mc31_c; \
c->PFX ## _pixels_tab[IDX][8] = PFX ## NUM ## _mc02_c; \
c->PFX ## _pixels_tab[IDX][9] = PFX ## NUM ## _mc12_c; \
c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
dspfunc(put_qpel, 0, 16);
dspfunc(put_qpel, 1, 8);
dspfunc(put_no_rnd_qpel, 0, 16);
dspfunc(put_no_rnd_qpel, 1, 8);
dspfunc(avg_qpel, 0, 16);
dspfunc(avg_qpel, 1, 8);
if (ARCH_X86)
ff_qpeldsp_init_x86(c);
}
/*
* quarterpel DSP functions
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* quarterpel DSP functions
*/
#ifndef AVCODEC_QPELDSP_H
#define AVCODEC_QPELDSP_H
#include <stddef.h>
#include <stdint.h>
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
int dst_stride, int src_stride1, int src_stride2,
int h);
#define DEF_OLD_QPEL(name) \
void ff_put_ ## name(uint8_t *dst /* align width (8 or 16) */, \
uint8_t *src /* align 1 */, ptrdiff_t stride); \
void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */, \
uint8_t *src /* align 1 */, ptrdiff_t stride); \
void ff_avg_ ## name(uint8_t *dst /* align width (8 or 16) */, \
uint8_t *src /* align 1 */, ptrdiff_t stride);
DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c)
DEF_OLD_QPEL(qpel16_mc12_old_c)
DEF_OLD_QPEL(qpel16_mc32_old_c)
DEF_OLD_QPEL(qpel16_mc13_old_c)
DEF_OLD_QPEL(qpel16_mc33_old_c)
DEF_OLD_QPEL(qpel8_mc11_old_c)
DEF_OLD_QPEL(qpel8_mc31_old_c)
DEF_OLD_QPEL(qpel8_mc12_old_c)
DEF_OLD_QPEL(qpel8_mc32_old_c)
DEF_OLD_QPEL(qpel8_mc13_old_c)
DEF_OLD_QPEL(qpel8_mc33_old_c)
typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
uint8_t *src /* align 1 */, ptrdiff_t stride);
/**
* quarterpel DSP context
*/
typedef struct QpelDSPContext {
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
} QpelDSPContext;
void ff_qpeldsp_init(QpelDSPContext *c);
void ff_qpeldsp_init_x86(QpelDSPContext *c);
#endif /* AVCODEC_QPELDSP_H */
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "golomb.h" #include "golomb.h"
#include "internal.h" #include "internal.h"
#include "mathops.h" #include "mathops.h"
#include "qpeldsp.h"
#include "rectangle.h" #include "rectangle.h"
#include "thread.h" #include "thread.h"
......
...@@ -27,8 +27,8 @@ ...@@ -27,8 +27,8 @@
#ifndef AVCODEC_RV34DSP_H #ifndef AVCODEC_RV34DSP_H
#define AVCODEC_RV34DSP_H #define AVCODEC_RV34DSP_H
#include "dsputil.h"
#include "h264chroma.h" #include "h264chroma.h"
#include "qpeldsp.h"
typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/, typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
uint8_t *src1/*align width (8 or 16)*/, uint8_t *src1/*align width (8 or 16)*/,
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "mpegvideo.h" #include "mpegvideo.h"
#include "h263.h" #include "h263.h"
#include "h264chroma.h" #include "h264chroma.h"
#include "qpeldsp.h"
#include "vc1.h" #include "vc1.h"
#include "vc1data.h" #include "vc1data.h"
#include "vc1acdata.h" #include "vc1acdata.h"
...@@ -5603,6 +5604,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) ...@@ -5603,6 +5604,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
if (ff_vc1_init_common(v) < 0) if (ff_vc1_init_common(v) < 0)
return -1; return -1;
ff_h264chroma_init(&v->h264chroma, 8); ff_h264chroma_init(&v->h264chroma, 8);
ff_qpeldsp_init(&s->qdsp);
ff_vc1dsp_init(&v->vc1dsp); ff_vc1dsp_init(&v->vc1dsp);
if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) { if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
...@@ -5971,8 +5973,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ...@@ -5971,8 +5973,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
s->current_picture_ptr->f->repeat_pict = v->rptfrm * 2; s->current_picture_ptr->f->repeat_pict = v->rptfrm * 2;
} }
s->me.qpel_put = s->dsp.put_qpel_pixels_tab; s->me.qpel_put = s->qdsp.put_qpel_pixels_tab;
s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab; s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
if (avctx->hwaccel) { if (avctx->hwaccel) {
if (avctx->hwaccel->start_frame(avctx, buf, buf_size) < 0) if (avctx->hwaccel->start_frame(avctx, buf, buf_size) < 0)
......
...@@ -26,8 +26,8 @@ ...@@ -26,8 +26,8 @@
*/ */
#include "libavutil/common.h" #include "libavutil/common.h"
#include "dsputil.h"
#include "h264chroma.h" #include "h264chroma.h"
#include "qpeldsp.h"
#include "vc1dsp.h" #include "vc1dsp.h"
/* Apply overlap transform to horizontal edge */ /* Apply overlap transform to horizontal edge */
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include <stdint.h> #include <stdint.h>
#include "dsputil.h" #include "qpeldsp.h"
typedef struct WMV2DSPContext { typedef struct WMV2DSPContext {
void (*idct_add)(uint8_t *dest, int line_size, int16_t *block); void (*idct_add)(uint8_t *dest, int line_size, int16_t *block);
......
...@@ -20,6 +20,7 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o ...@@ -20,6 +20,7 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
...@@ -44,13 +45,13 @@ OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o ...@@ -44,13 +45,13 @@ OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
x86/fpel_mmx.o \
x86/idct_mmx_xvid.o \ x86/idct_mmx_xvid.o \
x86/idct_sse2_xvid.o \ x86/idct_sse2_xvid.o \
x86/simple_idct.o x86/simple_idct.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
x86/hpeldsp_mmx.o x86/hpeldsp_mmx.o
MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o
MMX-OBJS-$(CONFIG_QPELDSP) += x86/fpel_mmx.o
MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
...@@ -61,10 +62,7 @@ YASM-OBJS += x86/deinterlace.o \ ...@@ -61,10 +62,7 @@ YASM-OBJS += x86/deinterlace.o \
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \ YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o
x86/fpel.o \
x86/mpeg4qpel.o \
x86/qpel.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o YASM-OBJS-$(CONFIG_FFT) += x86/fft.o
YASM-OBJS-$(CONFIG_H263DSP) += x86/h263_loopfilter.o YASM-OBJS-$(CONFIG_H263DSP) += x86/h263_loopfilter.o
...@@ -86,6 +84,9 @@ YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ ...@@ -86,6 +84,9 @@ YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
x86/hpeldsp.o x86/hpeldsp.o
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
x86/fpel.o \
x86/qpel.o
YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o
YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o
......
...@@ -23,55 +23,11 @@ ...@@ -23,55 +23,11 @@
#include "libavutil/x86/cpu.h" #include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/pixels.h"
#include "libavcodec/simple_idct.h" #include "libavcodec/simple_idct.h"
#include "libavcodec/version.h" #include "libavcodec/version.h"
#include "dsputil_x86.h" #include "dsputil_x86.h"
#include "fpel.h"
#include "idct_xvid.h" #include "idct_xvid.h"
void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
uint8_t *src2, int dstStride,
int src1Stride, int h);
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride,
int h);
void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride,
int h);
void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
int order); int order);
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
...@@ -89,418 +45,6 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, ...@@ -89,418 +45,6 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len); int32_t min, int32_t max, unsigned int len);
#if HAVE_YASM
CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
#define QPEL_OP(OPNAME, RND, MMX) \
static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
stride, 8); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
stride, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
stride, 8); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
8, stride); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
stride, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
stride, stride); \
} \
\
static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
8, stride); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
8, stride, 9); \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
stride, 9); \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[9]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
stride, 8); \
} \
\
static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
stride, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
stride, stride, 16);\
} \
\
static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t*) temp; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
stride, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
stride, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
stride); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
stride, stride); \
} \
\
static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
stride); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
stride, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[17 * 2]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
stride, 17); \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[17 * 2]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
stride, 17); \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[17 * 2]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
stride, 16); \
}
QPEL_OP(put_, _, mmxext)
QPEL_OP(avg_, _, mmxext)
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
#endif /* HAVE_YASM */
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
do { \
c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
} while (0)
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
int cpu_flags, unsigned high_bit_depth) int cpu_flags, unsigned high_bit_depth)
{ {
...@@ -550,14 +94,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, ...@@ -550,14 +94,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_MMXEXT_INLINE */
#if HAVE_MMXEXT_EXTERNAL #if HAVE_MMXEXT_EXTERNAL
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
#endif /* HAVE_MMXEXT_EXTERNAL */ #endif /* HAVE_MMXEXT_EXTERNAL */
} }
......
;****************************************************************************** ;******************************************************************************
;* mpeg4 qpel ;* quarterpel DSP functions
;*
;* Copyright (c) 2008 Loren Merritt ;* Copyright (c) 2008 Loren Merritt
;* ;*
;* This file is part of Libav. ;* This file is part of Libav.
......
/*
* quarterpel DSP functions
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stddef.h>
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/pixels.h"
#include "libavcodec/qpeldsp.h"
#include "fpel.h"
void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
uint8_t *src2, int dstStride,
int src1Stride, int h);
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int dstStride, int src1Stride, int h);
void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride,
int h);
void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride, int h);
void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride,
int h);
void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
int dstStride, int srcStride);
#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
#if HAVE_YASM
CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
#define QPEL_OP(OPNAME, RND, MMX) \
static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
stride, 8); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
stride, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
stride, 8); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
8, stride); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
stride, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
stride, stride); \
} \
\
static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[8]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
8, stride); \
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half + 64; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
stride, 8, 8); \
} \
\
static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
8, stride, 9); \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[8 + 9]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
stride, 9); \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
stride, 8); \
} \
\
static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[9]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
stride, 9); \
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
stride, 8); \
} \
\
static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
stride, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
stride, stride, 16);\
} \
\
static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t*) temp; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
stride, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
stride, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
stride); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
stride, stride); \
} \
\
static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t temp[32]; \
uint8_t *const half = (uint8_t *) temp; \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
stride); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
stride, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[16 * 2 + 17 * 2]; \
uint8_t *const halfH = (uint8_t *) half + 256; \
uint8_t *const halfHV = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
16, 16); \
ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
stride, 16, 16); \
} \
\
static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[17 * 2]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
stride, 17); \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[17 * 2]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
stride, 17); \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
stride, 16); \
} \
\
static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
ptrdiff_t stride) \
{ \
uint64_t half[17 * 2]; \
uint8_t *const halfH = (uint8_t *) half; \
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
stride, 17); \
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
stride, 16); \
}
QPEL_OP(put_, _, mmxext)
QPEL_OP(avg_, _, mmxext)
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
#endif /* HAVE_YASM */
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
do { \
c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
} while (0)
av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (X86_MMXEXT(cpu_flags)) {
#if HAVE_MMXEXT_EXTERNAL
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
#endif /* HAVE_MMXEXT_EXTERNAL */
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment