Commit 8c53d39e authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Luca Barbato

lavc: introduce VideoDSPContext

Move some functions from dsputil. The idea is that videodsp contains
functions that are useful for a large and varied set of video decoders.
Currently, it contains emulated_edge_mc() and prefetch().
Signed-off-by: 's avatarLuca Barbato <lu_zero@gentoo.org>
parent a925f723
...@@ -1328,6 +1328,7 @@ CONFIG_EXTRA=" ...@@ -1328,6 +1328,7 @@ CONFIG_EXTRA="
rangecoder rangecoder
rtpdec rtpdec
sinewin sinewin
videodsp
vp3dsp vp3dsp
" "
...@@ -1544,6 +1545,7 @@ mpeg4_decoder_select="h263_decoder mpeg4video_parser" ...@@ -1544,6 +1545,7 @@ mpeg4_decoder_select="h263_decoder mpeg4video_parser"
mpeg4_encoder_select="h263_encoder" mpeg4_encoder_select="h263_encoder"
mpeg4_vaapi_hwaccel_select="vaapi mpeg4_decoder" mpeg4_vaapi_hwaccel_select="vaapi mpeg4_decoder"
mpeg4_vdpau_decoder_select="vdpau mpeg4_decoder" mpeg4_vdpau_decoder_select="vdpau mpeg4_decoder"
mpegvideo_select="videodsp"
msmpeg4v1_decoder_select="h263_decoder" msmpeg4v1_decoder_select="h263_decoder"
msmpeg4v1_encoder_select="h263_encoder" msmpeg4v1_encoder_select="h263_encoder"
msmpeg4v2_decoder_select="h263_decoder" msmpeg4v2_decoder_select="h263_decoder"
...@@ -1589,12 +1591,12 @@ vc1_vdpau_decoder_select="vdpau vc1_decoder" ...@@ -1589,12 +1591,12 @@ vc1_vdpau_decoder_select="vdpau vc1_decoder"
vc1image_decoder_select="vc1_decoder" vc1image_decoder_select="vc1_decoder"
vorbis_decoder_select="mdct" vorbis_decoder_select="mdct"
vorbis_encoder_select="mdct" vorbis_encoder_select="mdct"
vp3_decoder_select="vp3dsp" vp3_decoder_select="vp3dsp videodsp"
vp5_decoder_select="vp3dsp" vp5_decoder_select="vp3dsp videodsp"
vp6_decoder_select="huffman vp3dsp" vp6_decoder_select="huffman vp3dsp videodsp"
vp6a_decoder_select="vp6_decoder" vp6a_decoder_select="vp6_decoder"
vp6f_decoder_select="vp6_decoder" vp6f_decoder_select="vp6_decoder"
vp8_decoder_select="h264pred h264qpel" vp8_decoder_select="h264pred videodsp"
wmapro_decoder_select="mdct sinewin" wmapro_decoder_select="mdct sinewin"
wmav1_decoder_select="mdct sinewin" wmav1_decoder_select="mdct sinewin"
wmav1_encoder_select="mdct sinewin" wmav1_encoder_select="mdct sinewin"
......
...@@ -67,6 +67,7 @@ OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) ...@@ -67,6 +67,7 @@ OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes)
OBJS-$(CONFIG_SINEWIN) += sinewin.o OBJS-$(CONFIG_SINEWIN) += sinewin.o
OBJS-$(CONFIG_VAAPI) += vaapi.o OBJS-$(CONFIG_VAAPI) += vaapi.o
OBJS-$(CONFIG_VDPAU) += vdpau.o OBJS-$(CONFIG_VDPAU) += vdpau.o
OBJS-$(CONFIG_VIDEODSP) += videodsp.o
OBJS-$(CONFIG_VP3DSP) += vp3dsp.o OBJS-$(CONFIG_VP3DSP) += vp3dsp.o
# decoders/encoders/hardware accelerators # decoders/encoders/hardware accelerators
......
...@@ -30,6 +30,8 @@ OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o ...@@ -30,6 +30,8 @@ OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o
OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \ OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \
arm/rv40dsp_init_arm.o \ arm/rv40dsp_init_arm.o \
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o \
OBJS += arm/dsputil_init_arm.o \ OBJS += arm/dsputil_init_arm.o \
arm/dsputil_arm.o \ arm/dsputil_arm.o \
arm/fft_init_arm.o \ arm/fft_init_arm.o \
...@@ -41,6 +43,9 @@ OBJS += arm/dsputil_init_arm.o \ ...@@ -41,6 +43,9 @@ OBJS += arm/dsputil_init_arm.o \
ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \ ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \
arm/mpegvideo_armv5te_s.o \ arm/mpegvideo_armv5te_s.o \
ARMV5TE-OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_armv5te.o \
arm/videodsp_armv5te.o \
ARMV5TE-OBJS += arm/dsputil_init_armv5te.o \ ARMV5TE-OBJS += arm/dsputil_init_armv5te.o \
arm/simple_idct_armv5te.o \ arm/simple_idct_armv5te.o \
......
...@@ -22,15 +22,7 @@ ...@@ -22,15 +22,7 @@
#include "config.h" #include "config.h"
#include "libavutil/arm/asm.S" #include "libavutil/arm/asm.S"
#if HAVE_ARMV5TE_EXTERNAL #if !HAVE_ARMV5TE_EXTERNAL
function ff_prefetch_arm, export=1
subs r2, r2, #1
pld [r0]
add r0, r0, r1
bne ff_prefetch_arm
bx lr
endfunc
#else
#define pld @ #define pld @
#endif #endif
......
...@@ -25,8 +25,6 @@ void ff_simple_idct_armv5te(DCTELEM *data); ...@@ -25,8 +25,6 @@ void ff_simple_idct_armv5te(DCTELEM *data);
void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data); void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data); void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
void ff_prefetch_arm(void *mem, int stride, int h);
av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx) av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx)
{ {
if (avctx->bits_per_raw_sample <= 8 && if (avctx->bits_per_raw_sample <= 8 &&
...@@ -37,6 +35,4 @@ av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx) ...@@ -37,6 +35,4 @@ av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx)
c->idct = ff_simple_idct_armv5te; c->idct = ff_simple_idct_armv5te;
c->idct_permutation_type = FF_NO_IDCT_PERM; c->idct_permutation_type = FF_NO_IDCT_PERM;
} }
c->prefetch = ff_prefetch_arm;
} }
@
@ ARMv5te optimized DSP utils
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
@
@ This file is part of Libav.
@
@ Libav is free software; you can redistribute it and/or
@ modify it under the terms of the GNU Lesser General Public
@ License as published by the Free Software Foundation; either
@ version 2.1 of the License, or (at your option) any later version.
@
@ Libav is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
@ Lesser General Public License for more details.
@
@ You should have received a copy of the GNU Lesser General Public
@ License along with Libav; if not, write to the Free Software
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@
#include "config.h"
#include "libavutil/arm/asm.S"
#if HAVE_ARMV5TE_EXTERNAL
function ff_prefetch_arm, export=1
subs r2, r2, #1
pld [r0]
add r0, r0, r1
bne ff_prefetch_arm
bx lr
endfunc
#endif
/*
* Copyright (C) 2012 Ronald S. Bultje
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/cpu.h"
#include "libavcodec/videodsp.h"
#include "videodsp_arm.h"
void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc)
{
int cpu_flags = av_get_cpu_flags();
if (have_armv5te(cpu_flags)) ff_videodsp_init_armv5te(ctx, bpc);
}
...@@ -392,7 +392,7 @@ static inline void mc_dir_part(AVSContext *h,Picture *pic, ...@@ -392,7 +392,7 @@ static inline void mc_dir_part(AVSContext *h,Picture *pic,
|| full_my < 0-extra_height || full_my < 0-extra_height
|| full_mx + 16/*FIXME*/ > pic_width + extra_width || full_mx + 16/*FIXME*/ > pic_width + extra_width
|| full_my + 16/*FIXME*/ > pic_height + extra_height){ || full_my + 16/*FIXME*/ > pic_height + extra_height){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->l_stride, h->l_stride, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->l_stride, h->l_stride,
16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
src_y= s->edge_emu_buffer + 2 + 2*h->l_stride; src_y= s->edge_emu_buffer + 2 + 2*h->l_stride;
emu=1; emu=1;
...@@ -401,14 +401,14 @@ static inline void mc_dir_part(AVSContext *h,Picture *pic, ...@@ -401,14 +401,14 @@ static inline void mc_dir_part(AVSContext *h,Picture *pic,
qpix_op[luma_xy](dest_y, src_y, h->l_stride); //FIXME try variable height perhaps? qpix_op[luma_xy](dest_y, src_y, h->l_stride); //FIXME try variable height perhaps?
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->c_stride, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->c_stride,
9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
src_cb= s->edge_emu_buffer; src_cb= s->edge_emu_buffer;
} }
chroma_op(dest_cb, src_cb, h->c_stride, chroma_height, mx&7, my&7); chroma_op(dest_cb, src_cb, h->c_stride, chroma_height, mx&7, my&7);
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->c_stride, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->c_stride,
9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
src_cr= s->edge_emu_buffer; src_cr= s->edge_emu_buffer;
} }
......
...@@ -2615,8 +2615,6 @@ static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) ...@@ -2615,8 +2615,6 @@ static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
add_pixels_clamped_c(block, dest, line_size); add_pixels_clamped_c(block, dest, line_size);
} }
static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
/* init static data */ /* init static data */
av_cold void ff_dsputil_static_init(void) av_cold void ff_dsputil_static_init(void)
{ {
...@@ -2867,8 +2865,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -2867,8 +2865,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->shrink[2]= ff_shrink44; c->shrink[2]= ff_shrink44;
c->shrink[3]= ff_shrink88; c->shrink[3]= ff_shrink88;
c->prefetch= just_return;
memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
...@@ -2905,7 +2901,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -2905,7 +2901,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
#define BIT_DEPTH_FUNCS(depth, dct)\ #define BIT_DEPTH_FUNCS(depth, dct)\
c->get_pixels = FUNCC(get_pixels ## dct , depth);\ c->get_pixels = FUNCC(get_pixels ## dct , depth);\
c->draw_edges = FUNCC(draw_edges , depth);\ c->draw_edges = FUNCC(draw_edges , depth);\
c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
c->clear_block = FUNCC(clear_block ## dct , depth);\ c->clear_block = FUNCC(clear_block ## dct , depth);\
c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
......
...@@ -188,15 +188,6 @@ void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); ...@@ -188,15 +188,6 @@ void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
void ff_init_scantable_permutation(uint8_t *idct_permutation, void ff_init_scantable_permutation(uint8_t *idct_permutation,
int idct_permutation_type); int idct_permutation_type);
#define EMULATED_EDGE(depth) \
void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
int block_w, int block_h,\
int src_x, int src_y, int w, int h);
EMULATED_EDGE(8)
EMULATED_EDGE(9)
EMULATED_EDGE(10)
/** /**
* DSPContext. * DSPContext.
*/ */
...@@ -215,21 +206,6 @@ typedef struct DSPContext { ...@@ -215,21 +206,6 @@ typedef struct DSPContext {
void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/); int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
/**
* Motion estimation with emulated edge values.
* @param buf pointer to destination buffer (unaligned)
* @param src pointer to pixel source (unaligned)
* @param linesize width (in pixels) for src/buf
* @param block_w number of pixels (per row) to copy to buf
* @param block_h nummber of pixel rows to copy to buf
* @param src_x offset of src to start of row - this may be negative
* @param src_y offset of src to top of image - this may be negative
* @param w width of src in pixels
* @param h height of src in pixels
*/
void (*emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h);
/** /**
* translational global motion compensation. * translational global motion compensation.
*/ */
...@@ -465,8 +441,6 @@ typedef struct DSPContext { ...@@ -465,8 +441,6 @@ typedef struct DSPContext {
#define EDGE_TOP 1 #define EDGE_TOP 1
#define EDGE_BOTTOM 2 #define EDGE_BOTTOM 2
void (*prefetch)(void *mem, int stride, int h);
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
/** /**
......
...@@ -113,85 +113,6 @@ static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, i ...@@ -113,85 +113,6 @@ static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, i
memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom
} }
/**
* Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
* @param buf destination buffer
* @param src source buffer
* @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
* @param block_w width of block
* @param block_h height of block
* @param src_x x coordinate of the top left sample of the block in the source buffer
* @param src_y y coordinate of the top left sample of the block in the source buffer
* @param w width of the source buffer
* @param h height of the source buffer
*/
void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h){
int x, y;
int start_y, start_x, end_y, end_x;
if(src_y>= h){
src+= (h-1-src_y)*linesize;
src_y=h-1;
}else if(src_y<=-block_h){
src+= (1-block_h-src_y)*linesize;
src_y=1-block_h;
}
if(src_x>= w){
src+= (w-1-src_x)*sizeof(pixel);
src_x=w-1;
}else if(src_x<=-block_w){
src+= (1-block_w-src_x)*sizeof(pixel);
src_x=1-block_w;
}
start_y= FFMAX(0, -src_y);
start_x= FFMAX(0, -src_x);
end_y= FFMIN(block_h, h-src_y);
end_x= FFMIN(block_w, w-src_x);
assert(start_y < end_y && block_h);
assert(start_x < end_x && block_w);
w = end_x - start_x;
src += start_y*linesize + start_x*sizeof(pixel);
buf += start_x*sizeof(pixel);
//top
for(y=0; y<start_y; y++){
memcpy(buf, src, w*sizeof(pixel));
buf += linesize;
}
// copy existing part
for(; y<end_y; y++){
memcpy(buf, src, w*sizeof(pixel));
src += linesize;
buf += linesize;
}
//bottom
src -= linesize;
for(; y<block_h; y++){
memcpy(buf, src, w*sizeof(pixel));
buf += linesize;
}
buf -= block_h * linesize + start_x*sizeof(pixel);
while (block_h--){
pixel *bufp = (pixel*)buf;
//left
for(x=0; x<start_x; x++){
bufp[x] = bufp[start_x];
}
//right
for(x=end_x; x<block_w; x++){
bufp[x] = bufp[end_x - 1];
}
buf += linesize;
}
}
#define DCTELEM_FUNCS(dctcoef, suffix) \ #define DCTELEM_FUNCS(dctcoef, suffix) \
static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block, \ static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block, \
const uint8_t *_pixels, \ const uint8_t *_pixels, \
......
...@@ -486,11 +486,11 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, ...@@ -486,11 +486,11 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
full_my < 0 - extra_height || full_my < 0 - extra_height ||
full_mx + 16 /*FIXME*/ > pic_width + extra_width || full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
full_my + 16 /*FIXME*/ > pic_height + extra_height) { full_my + 16 /*FIXME*/ > pic_height + extra_height) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
src_y - (2 << pixel_shift) - 2 * h->mb_linesize, src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize, h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
full_my - 2, pic_width, pic_height); full_my - 2, pic_width, pic_height);
src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
emu = 1; emu = 1;
} }
...@@ -505,12 +505,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, ...@@ -505,12 +505,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
if (chroma_idc == 3 /* yuv444 */) { if (chroma_idc == 3 /* yuv444 */) {
src_cb = pic->f.data[1] + offset; src_cb = pic->f.data[1] + offset;
if (emu) { if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
src_cb - (2 << pixel_shift) - 2 * h->mb_linesize, src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize, h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, 16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2, full_mx - 2, full_my - 2,
pic_width, pic_height); pic_width, pic_height);
src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
} }
qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps? qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
...@@ -519,12 +519,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, ...@@ -519,12 +519,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
src_cr = pic->f.data[2] + offset; src_cr = pic->f.data[2] + offset;
if (emu) { if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
src_cr - (2 << pixel_shift) - 2 * h->mb_linesize, src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize, h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, 16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2, full_mx - 2, full_my - 2,
pic_width, pic_height); pic_width, pic_height);
src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize; src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
} }
qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps? qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
...@@ -546,9 +546,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, ...@@ -546,9 +546,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
(my >> ysh) * h->mb_uvlinesize; (my >> ysh) * h->mb_uvlinesize;
if (emu) { if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cb = s->edge_emu_buffer; src_cb = s->edge_emu_buffer;
} }
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
...@@ -556,9 +556,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic, ...@@ -556,9 +556,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7); mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
if (emu) { if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cr = s->edge_emu_buffer; src_cr = s->edge_emu_buffer;
} }
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
...@@ -735,15 +735,15 @@ static av_always_inline void prefetch_motion(H264Context *h, int list, ...@@ -735,15 +735,15 @@ static av_always_inline void prefetch_motion(H264Context *h, int list,
int off = (mx << pixel_shift) + int off = (mx << pixel_shift) +
(my + (s->mb_x & 3) * 4) * h->mb_linesize + (my + (s->mb_x & 3) * 4) * h->mb_linesize +
(64 << pixel_shift); (64 << pixel_shift);
s->dsp.prefetch(src[0] + off, s->linesize, 4); s->vdsp.prefetch(src[0] + off, s->linesize, 4);
if (chroma_idc == 3 /* yuv444 */) { if (chroma_idc == 3 /* yuv444 */) {
s->dsp.prefetch(src[1] + off, s->linesize, 4); s->vdsp.prefetch(src[1] + off, s->linesize, 4);
s->dsp.prefetch(src[2] + off, s->linesize, 4); s->vdsp.prefetch(src[2] + off, s->linesize, 4);
} else { } else {
off = ((mx >> 1) << pixel_shift) + off = ((mx >> 1) << pixel_shift) +
((my >> 1) + (s->mb_x & 7)) * s->uvlinesize + ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize +
(64 << pixel_shift); (64 << pixel_shift);
s->dsp.prefetch(src[1] + off, src[2] - src[1], 2); s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
} }
} }
} }
...@@ -973,6 +973,7 @@ static av_cold void common_init(H264Context *h) ...@@ -973,6 +973,7 @@ static av_cold void common_init(H264Context *h)
/* needed so that IDCT permutation is known early */ /* needed so that IDCT permutation is known early */
ff_dsputil_init(&s->dsp, s->avctx); ff_dsputil_init(&s->dsp, s->avctx);
ff_videodsp_init(&s->vdsp, 8);
memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t)); memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t)); memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
...@@ -2439,6 +2440,7 @@ static int h264_set_parameter_from_sps(H264Context *h) ...@@ -2439,6 +2440,7 @@ static int h264_set_parameter_from_sps(H264Context *h)
h->sps.chroma_format_idc); h->sps.chroma_format_idc);
s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
ff_dsputil_init(&s->dsp, s->avctx); ff_dsputil_init(&s->dsp, s->avctx);
ff_videodsp_init(&s->vdsp, h->sps.bit_depth_luma);
} else { } else {
av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n",
h->sps.bit_depth_luma); h->sps.bit_depth_luma);
......
...@@ -60,8 +60,8 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) ...@@ -60,8 +60,8 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
dest_cb = s->current_picture.f.data[1] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h; dest_cb = s->current_picture.f.data[1] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;
dest_cr = s->current_picture.f.data[2] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h; dest_cr = s->current_picture.f.data[2] + (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;
s->dsp.prefetch(dest_y + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT), s->linesize, 4); s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT), s->linesize, 4);
s->dsp.prefetch(dest_cb + (s->mb_x & 7) * s->uvlinesize + (64 << PIXEL_SHIFT), dest_cr - dest_cb, 2); s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * s->uvlinesize + (64 << PIXEL_SHIFT), dest_cr - dest_cb, 2);
h->list_counts[mb_xy] = h->list_count; h->list_counts[mb_xy] = h->list_count;
...@@ -292,8 +292,8 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) ...@@ -292,8 +292,8 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
for (p = 0; p < plane_count; p++) { for (p = 0; p < plane_count; p++) {
dest[p] = s->current_picture.f.data[p] + dest[p] = s->current_picture.f.data[p] +
((mb_x << PIXEL_SHIFT) + mb_y * s->linesize) * 16; ((mb_x << PIXEL_SHIFT) + mb_y * s->linesize) * 16;
s->dsp.prefetch(dest[p] + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT), s->vdsp.prefetch(dest[p] + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT),
s->linesize, 4); s->linesize, 4);
} }
h->list_counts[mb_xy] = h->list_count; h->list_counts[mb_xy] = h->list_count;
......
...@@ -175,6 +175,7 @@ const uint8_t *avpriv_mpv_find_start_code(const uint8_t *restrict p, ...@@ -175,6 +175,7 @@ const uint8_t *avpriv_mpv_find_start_code(const uint8_t *restrict p,
av_cold int ff_dct_common_init(MpegEncContext *s) av_cold int ff_dct_common_init(MpegEncContext *s)
{ {
ff_dsputil_init(&s->dsp, s->avctx); ff_dsputil_init(&s->dsp, s->avctx);
ff_videodsp_init(&s->vdsp, 8);
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c; s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c; s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "parser.h" #include "parser.h"
#include "mpeg12data.h" #include "mpeg12data.h"
#include "rl.h" #include "rl.h"
#include "videodsp.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
...@@ -358,6 +359,7 @@ typedef struct MpegEncContext { ...@@ -358,6 +359,7 @@ typedef struct MpegEncContext {
int h263_long_vectors; ///< use horrible h263v1 long vector mode int h263_long_vectors; ///< use horrible h263v1 long vector mode
DSPContext dsp; ///< pointers for accelerated dsp functions DSPContext dsp; ///< pointers for accelerated dsp functions
VideoDSPContext vdsp;
int f_code; ///< forward MV resolution int f_code; ///< forward MV resolution
int b_code; ///< backward MV resolution for B Frames (mpeg4) int b_code; ///< backward MV resolution for B Frames (mpeg4)
int16_t (*p_mv_table_base)[2]; int16_t (*p_mv_table_base)[2];
......
...@@ -1782,16 +1782,16 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, ...@@ -1782,16 +1782,16 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) { if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
uint8_t *ebuf = s->edge_emu_buffer + 32; uint8_t *ebuf = s->edge_emu_buffer + 32;
s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16, s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
mb_y * 16, s->width, s->height); mb_y * 16, s->width, s->height);
ptr_y = ebuf; ptr_y = ebuf;
s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8, s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
mb_block_height, mb_x * 8, mb_y * 8, mb_block_height, mb_x * 8, mb_y * 8,
s->width >> 1, s->height >> 1); s->width >> 1, s->height >> 1);
ptr_cb = ebuf + 18 * wrap_y; ptr_cb = ebuf + 18 * wrap_y;
s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8, s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
mb_block_height, mb_x * 8, mb_y * 8, mb_block_height, mb_x * 8, mb_y * 8,
s->width >> 1, s->height >> 1); s->width >> 1, s->height >> 1);
ptr_cr = ebuf + 18 * wrap_y + 8; ptr_cr = ebuf + 18 * wrap_y + 8;
} }
......
...@@ -59,7 +59,7 @@ static void gmc1_motion(MpegEncContext *s, ...@@ -59,7 +59,7 @@ static void gmc1_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){ if(s->flags&CODEC_FLAG_EMU_EDGE){
if( (unsigned)src_x >= FFMAX(s->h_edge_pos - 17, 0) if( (unsigned)src_x >= FFMAX(s->h_edge_pos - 17, 0)
|| (unsigned)src_y >= FFMAX(s->v_edge_pos - 17, 0)){ || (unsigned)src_y >= FFMAX(s->v_edge_pos - 17, 0)){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos); s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer; ptr= s->edge_emu_buffer;
} }
} }
...@@ -98,7 +98,7 @@ static void gmc1_motion(MpegEncContext *s, ...@@ -98,7 +98,7 @@ static void gmc1_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){ if(s->flags&CODEC_FLAG_EMU_EDGE){
if( (unsigned)src_x >= FFMAX((s->h_edge_pos>>1) - 9, 0) if( (unsigned)src_x >= FFMAX((s->h_edge_pos>>1) - 9, 0)
|| (unsigned)src_y >= FFMAX((s->v_edge_pos>>1) - 9, 0)){ || (unsigned)src_y >= FFMAX((s->v_edge_pos>>1) - 9, 0)){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer; ptr= s->edge_emu_buffer;
emu=1; emu=1;
} }
...@@ -107,7 +107,7 @@ static void gmc1_motion(MpegEncContext *s, ...@@ -107,7 +107,7 @@ static void gmc1_motion(MpegEncContext *s,
ptr = ref_picture[2] + offset; ptr = ref_picture[2] + offset;
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer; ptr= s->edge_emu_buffer;
} }
s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
...@@ -195,7 +195,7 @@ static inline int hpel_motion(MpegEncContext *s, ...@@ -195,7 +195,7 @@ static inline int hpel_motion(MpegEncContext *s,
if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){ if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
if( (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&1) - 8, 0) if( (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&1) - 8, 0)
|| (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y&1) - 8, 0)){ || (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y&1) - 8, 0)){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, 9, 9, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, 9, 9,
src_x, src_y, s->h_edge_pos, s->v_edge_pos); src_x, src_y, s->h_edge_pos, s->v_edge_pos);
src= s->edge_emu_buffer; src= s->edge_emu_buffer;
emu=1; emu=1;
...@@ -285,19 +285,19 @@ if(s->quarter_sample) ...@@ -285,19 +285,19 @@ if(s->quarter_sample)
"MPEG motion vector out of boundary (%d %d)\n", src_x, src_y); "MPEG motion vector out of boundary (%d %d)\n", src_x, src_y);
return; return;
} }
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize,
17, 17+field_based, 17, 17+field_based,
src_x, src_y<<field_based, src_x, src_y<<field_based,
s->h_edge_pos, s->v_edge_pos); s->h_edge_pos, s->v_edge_pos);
ptr_y = s->edge_emu_buffer; ptr_y = s->edge_emu_buffer;
if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize; uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
s->dsp.emulated_edge_mc(uvbuf , s->vdsp.emulated_edge_mc(uvbuf ,
ptr_cb, s->uvlinesize, ptr_cb, s->uvlinesize,
9, 9+field_based, 9, 9+field_based,
uvsrc_x, uvsrc_y<<field_based, uvsrc_x, uvsrc_y<<field_based,
s->h_edge_pos>>1, s->v_edge_pos>>1); s->h_edge_pos>>1, s->v_edge_pos>>1);
s->dsp.emulated_edge_mc(uvbuf+16, s->vdsp.emulated_edge_mc(uvbuf+16,
ptr_cr, s->uvlinesize, ptr_cr, s->uvlinesize,
9, 9+field_based, 9, 9+field_based,
uvsrc_x, uvsrc_y<<field_based, uvsrc_x, uvsrc_y<<field_based,
...@@ -498,17 +498,17 @@ static inline void qpel_motion(MpegEncContext *s, ...@@ -498,17 +498,17 @@ static inline void qpel_motion(MpegEncContext *s,
if( (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&3) - 16, 0) if( (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&3) - 16, 0)
|| (unsigned)src_y > FFMAX( v_edge_pos - (motion_y&3) - h , 0)){ || (unsigned)src_y > FFMAX( v_edge_pos - (motion_y&3) - h , 0)){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize,
17, 17+field_based, src_x, src_y<<field_based, 17, 17+field_based, src_x, src_y<<field_based,
s->h_edge_pos, s->v_edge_pos); s->h_edge_pos, s->v_edge_pos);
ptr_y= s->edge_emu_buffer; ptr_y= s->edge_emu_buffer;
if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize; uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
s->dsp.emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, s->vdsp.emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize,
9, 9 + field_based, 9, 9 + field_based,
uvsrc_x, uvsrc_y<<field_based, uvsrc_x, uvsrc_y<<field_based,
s->h_edge_pos>>1, s->v_edge_pos>>1); s->h_edge_pos>>1, s->v_edge_pos>>1);
s->dsp.emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, s->vdsp.emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize,
9, 9 + field_based, 9, 9 + field_based,
uvsrc_x, uvsrc_y<<field_based, uvsrc_x, uvsrc_y<<field_based,
s->h_edge_pos>>1, s->v_edge_pos>>1); s->h_edge_pos>>1, s->v_edge_pos>>1);
...@@ -577,7 +577,7 @@ static void chroma_4mv_motion(MpegEncContext *s, ...@@ -577,7 +577,7 @@ static void chroma_4mv_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){ if(s->flags&CODEC_FLAG_EMU_EDGE){
if( (unsigned)src_x > FFMAX((s->h_edge_pos>>1) - (dxy &1) - 8, 0) if( (unsigned)src_x > FFMAX((s->h_edge_pos>>1) - (dxy &1) - 8, 0)
|| (unsigned)src_y > FFMAX((s->v_edge_pos>>1) - (dxy>>1) - 8, 0)){ || (unsigned)src_y > FFMAX((s->v_edge_pos>>1) - (dxy>>1) - 8, 0)){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize,
9, 9, src_x, src_y, 9, 9, src_x, src_y,
s->h_edge_pos>>1, s->v_edge_pos>>1); s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer; ptr= s->edge_emu_buffer;
...@@ -588,7 +588,7 @@ static void chroma_4mv_motion(MpegEncContext *s, ...@@ -588,7 +588,7 @@ static void chroma_4mv_motion(MpegEncContext *s,
ptr = ref_picture[2] + offset; ptr = ref_picture[2] + offset;
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize,
9, 9, src_x, src_y, 9, 9, src_x, src_y,
s->h_edge_pos>>1, s->v_edge_pos>>1); s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer; ptr= s->edge_emu_buffer;
...@@ -603,9 +603,9 @@ static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){ ...@@ -603,9 +603,9 @@ static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8; const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y; const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64; int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
s->dsp.prefetch(pix[0]+off, s->linesize, 4); s->vdsp.prefetch(pix[0]+off, s->linesize, 4);
off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2); s->vdsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
} }
/** /**
...@@ -757,7 +757,7 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s, ...@@ -757,7 +757,7 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){ if(s->flags&CODEC_FLAG_EMU_EDGE){
if( (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&3) - 8, 0) if( (unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x&3) - 8, 0)
|| (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y&3) - 8, 0)){ || (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y&3) - 8, 0)){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
s->linesize, 9, 9, s->linesize, 9, 9,
src_x, src_y, src_x, src_y,
s->h_edge_pos, s->v_edge_pos); s->h_edge_pos, s->v_edge_pos);
......
OBJS += ppc/dsputil_ppc.o \ OBJS += ppc/dsputil_ppc.o \
ppc/videodsp_ppc.o \
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
......
...@@ -137,21 +137,11 @@ static long check_dcbzl_effect(void) ...@@ -137,21 +137,11 @@ static long check_dcbzl_effect(void)
} }
#endif #endif
static void prefetch_ppc(void *mem, int stride, int h)
{
register const uint8_t *p = mem;
do {
__asm__ volatile ("dcbt 0,%0" : : "r" (p));
p+= stride;
} while(--h);
}
void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{ {
const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int high_bit_depth = avctx->bits_per_raw_sample > 8;
// Common optimizations whether AltiVec is available or not // Common optimizations whether AltiVec is available or not
c->prefetch = prefetch_ppc;
if (!high_bit_depth) { if (!high_bit_depth) {
switch (check_dcbzl_effect()) { switch (check_dcbzl_effect()) {
case 32: case 32:
......
/*
* Copyright (c) 2003-2004 Romain Dolbeau
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/videodsp.h"
static void prefetch_ppc(uint8_t *mem, ptrdiff_t stride, int h)
{
register const uint8_t *p = mem;
do {
__asm__ volatile ("dcbt 0,%0" : : "r" (p));
p += stride;
} while(--h);
}
void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc)
{
ctx->prefetch = prefetch_ppc;
}
...@@ -725,12 +725,12 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type, ...@@ -725,12 +725,12 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
uint8_t *uvbuf = s->edge_emu_buffer + 22 * s->linesize; uint8_t *uvbuf = s->edge_emu_buffer + 22 * s->linesize;
srcY -= 2 + 2*s->linesize; srcY -= 2 + 2*s->linesize;
s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, (width<<3)+6, (height<<3)+6, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, (width<<3)+6, (height<<3)+6,
src_x - 2, src_y - 2, s->h_edge_pos, s->v_edge_pos); src_x - 2, src_y - 2, s->h_edge_pos, s->v_edge_pos);
srcY = s->edge_emu_buffer + 2 + 2*s->linesize; srcY = s->edge_emu_buffer + 2 + 2*s->linesize;
s->dsp.emulated_edge_mc(uvbuf , srcU, s->uvlinesize, (width<<2)+1, (height<<2)+1, s->vdsp.emulated_edge_mc(uvbuf , srcU, s->uvlinesize, (width<<2)+1, (height<<2)+1,
uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1); uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
s->dsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, (width<<2)+1, (height<<2)+1, s->vdsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, (width<<2)+1, (height<<2)+1,
uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1); uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
srcU = uvbuf; srcU = uvbuf;
srcV = uvbuf + 16; srcV = uvbuf + 16;
......
...@@ -293,9 +293,9 @@ static inline void svq3_mc_dir_part(MpegEncContext *s, ...@@ -293,9 +293,9 @@ static inline void svq3_mc_dir_part(MpegEncContext *s,
src = pic->f.data[0] + mx + my * s->linesize; src = pic->f.data[0] + mx + my * s->linesize;
if (emu) { if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src, s->linesize,
width + 1, height + 1, width + 1, height + 1,
mx, my, s->h_edge_pos, s->v_edge_pos); mx, my, s->h_edge_pos, s->v_edge_pos);
src = s->edge_emu_buffer; src = s->edge_emu_buffer;
} }
if (thirdpel) if (thirdpel)
...@@ -319,10 +319,10 @@ static inline void svq3_mc_dir_part(MpegEncContext *s, ...@@ -319,10 +319,10 @@ static inline void svq3_mc_dir_part(MpegEncContext *s,
src = pic->f.data[i] + mx + my * s->uvlinesize; src = pic->f.data[i] + mx + my * s->uvlinesize;
if (emu) { if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src, s->uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src, s->uvlinesize,
width + 1, height + 1, width + 1, height + 1,
mx, my, (s->h_edge_pos >> 1), mx, my, (s->h_edge_pos >> 1),
s->v_edge_pos >> 1); s->v_edge_pos >> 1);
src = s->edge_emu_buffer; src = s->edge_emu_buffer;
} }
if (thirdpel) if (thirdpel)
......
...@@ -434,15 +434,15 @@ static void vc1_mc_1mv(VC1Context *v, int dir) ...@@ -434,15 +434,15 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize; uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
srcY -= s->mspel * (1 + s->linesize); srcY -= s->mspel * (1 + s->linesize);
s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
17 + s->mspel * 2, 17 + s->mspel * 2, 17 + s->mspel * 2, 17 + s->mspel * 2,
src_x - s->mspel, src_y - s->mspel, src_x - s->mspel, src_y - s->mspel,
s->h_edge_pos, v_edge_pos); s->h_edge_pos, v_edge_pos);
srcY = s->edge_emu_buffer; srcY = s->edge_emu_buffer;
s->dsp.emulated_edge_mc(uvbuf , srcU, s->uvlinesize, 8 + 1, 8 + 1, s->vdsp.emulated_edge_mc(uvbuf , srcU, s->uvlinesize, 8 + 1, 8 + 1,
uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1); uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
s->dsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1, s->vdsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1,
uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1); uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
srcU = uvbuf; srcU = uvbuf;
srcV = uvbuf + 16; srcV = uvbuf + 16;
/* if we deal with range reduction we need to scale source blocks */ /* if we deal with range reduction we need to scale source blocks */
...@@ -667,10 +667,10 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir) ...@@ -667,10 +667,10 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir)
|| (unsigned)(src_y - (s->mspel << fieldmv)) > v_edge_pos - (my & 3) - ((8 + s->mspel * 2) << fieldmv)) { || (unsigned)(src_y - (s->mspel << fieldmv)) > v_edge_pos - (my & 3) - ((8 + s->mspel * 2) << fieldmv)) {
srcY -= s->mspel * (1 + (s->linesize << fieldmv)); srcY -= s->mspel * (1 + (s->linesize << fieldmv));
/* check emulate edge stride and offset */ /* check emulate edge stride and offset */
s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
9 + s->mspel * 2, (9 + s->mspel * 2) << fieldmv, 9 + s->mspel * 2, (9 + s->mspel * 2) << fieldmv,
src_x - s->mspel, src_y - (s->mspel << fieldmv), src_x - s->mspel, src_y - (s->mspel << fieldmv),
s->h_edge_pos, v_edge_pos); s->h_edge_pos, v_edge_pos);
srcY = s->edge_emu_buffer; srcY = s->edge_emu_buffer;
/* if we deal with range reduction we need to scale source blocks */ /* if we deal with range reduction we need to scale source blocks */
if (v->rangeredfrm) { if (v->rangeredfrm) {
...@@ -868,12 +868,12 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir) ...@@ -868,12 +868,12 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
|| s->h_edge_pos < 18 || v_edge_pos < 18 || s->h_edge_pos < 18 || v_edge_pos < 18
|| (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9 || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
|| (unsigned)uvsrc_y > (v_edge_pos >> 1) - 9) { || (unsigned)uvsrc_y > (v_edge_pos >> 1) - 9) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer , srcU, s->uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer , srcU, s->uvlinesize,
8 + 1, 8 + 1, uvsrc_x, uvsrc_y, 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
s->h_edge_pos >> 1, v_edge_pos >> 1); s->h_edge_pos >> 1, v_edge_pos >> 1);
s->dsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize,
8 + 1, 8 + 1, uvsrc_x, uvsrc_y, 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
s->h_edge_pos >> 1, v_edge_pos >> 1); s->h_edge_pos >> 1, v_edge_pos >> 1);
srcU = s->edge_emu_buffer; srcU = s->edge_emu_buffer;
srcV = s->edge_emu_buffer + 16; srcV = s->edge_emu_buffer + 16;
...@@ -973,12 +973,12 @@ static void vc1_mc_4mv_chroma4(VC1Context *v) ...@@ -973,12 +973,12 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
|| s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv) || s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv)
|| (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5 || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5
|| (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) { || (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcU, s->uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcU, s->uvlinesize,
5, (5 << fieldmv), uvsrc_x, uvsrc_y, 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
s->h_edge_pos >> 1, v_edge_pos); s->h_edge_pos >> 1, v_edge_pos);
s->dsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize,
5, (5 << fieldmv), uvsrc_x, uvsrc_y, 5, (5 << fieldmv), uvsrc_x, uvsrc_y,
s->h_edge_pos >> 1, v_edge_pos); s->h_edge_pos >> 1, v_edge_pos);
srcU = s->edge_emu_buffer; srcU = s->edge_emu_buffer;
srcV = s->edge_emu_buffer + 16; srcV = s->edge_emu_buffer + 16;
...@@ -1888,15 +1888,15 @@ static void vc1_interp_mc(VC1Context *v) ...@@ -1888,15 +1888,15 @@ static void vc1_interp_mc(VC1Context *v)
uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize; uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
srcY -= s->mspel * (1 + s->linesize); srcY -= s->mspel * (1 + s->linesize);
s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
17 + s->mspel * 2, 17 + s->mspel * 2, 17 + s->mspel * 2, 17 + s->mspel * 2,
src_x - s->mspel, src_y - s->mspel, src_x - s->mspel, src_y - s->mspel,
s->h_edge_pos, v_edge_pos); s->h_edge_pos, v_edge_pos);
srcY = s->edge_emu_buffer; srcY = s->edge_emu_buffer;
s->dsp.emulated_edge_mc(uvbuf , srcU, s->uvlinesize, 8 + 1, 8 + 1, s->vdsp.emulated_edge_mc(uvbuf , srcU, s->uvlinesize, 8 + 1, 8 + 1,
uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1); uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
s->dsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1, s->vdsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1,
uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1); uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
srcU = uvbuf; srcU = uvbuf;
srcV = uvbuf + 16; srcV = uvbuf + 16;
/* if we deal with range reduction we need to scale source blocks */ /* if we deal with range reduction we need to scale source blocks */
......
/*
* Copyright (C) 2012 Ronald S. Bultje
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/common.h"
#include "videodsp.h"
#define BIT_DEPTH 8
#include "videodsp_template.c"
#undef BIT_DEPTH
#define BIT_DEPTH 16
#include "videodsp_template.c"
#undef BIT_DEPTH
static void just_return(uint8_t *buf, ptrdiff_t stride, int h)
{
}
void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
{
ctx->prefetch = just_return;
if (bpc <= 8) {
ctx->emulated_edge_mc = ff_emulated_edge_mc_8;
} else {
ctx->emulated_edge_mc = ff_emulated_edge_mc_16;
}
if (ARCH_ARM)
ff_videodsp_init_arm(ctx, bpc);
if (ARCH_PPC)
ff_videodsp_init_ppc(ctx, bpc);
if (ARCH_X86)
ff_videodsp_init_x86(ctx, bpc);
}
/*
* Copyright (C) 2012 Ronald S. Bultje
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Core video DSP helper functions
*/
#ifndef AVCODEC_VIDEODSP_H
#define AVCODEC_VIDEODSP_H
#include <stddef.h>
#include <stdint.h>
typedef struct VideoDSPContext {
/**
* Copy a rectangular area of samples to a temporary buffer and replicate
* the border samples.
*
* @param buf destination buffer
* @param src source buffer
* @param linesize number of bytes between 2 vertically adjacent samples
* in both the source and destination buffers
* @param block_w width of block
* @param block_h height of block
* @param src_x x coordinate of the top left sample of the block in the
* source buffer
* @param src_y y coordinate of the top left sample of the block in the
* source buffer
* @param w width of the source buffer
* @param h height of the source buffer
*/
void (*emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
ptrdiff_t linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h);
/**
* Prefetch memory into cache (if supported by hardware).
*
* @buf pointer to buffer to prefetch memory from
* @stride distance between two lines of buf (in bytes)
* @h number of lines to prefetch
*/
void (*prefetch)(uint8_t *buf, ptrdiff_t stride, int h);
} VideoDSPContext;
void ff_videodsp_init(VideoDSPContext *ctx, int bpc);
/* for internal use only (i.e. called by ff_videodsp_init() */
void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc);
void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc);
void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc);
#endif /* AVCODEC_VIDEODSP_H */
/*
* Copyright (c) 2002-2004 Michael Niedermayer
* Copyright (C) 2012 Ronald S. Bultje
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "bit_depth_template.c"
static void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
ptrdiff_t linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h)
{
int x, y;
int start_y, start_x, end_y, end_x;
if (src_y >= h) {
src += (h - 1 - src_y) * linesize;
src_y = h - 1;
} else if (src_y <= -block_h) {
src += (1 - block_h - src_y) * linesize;
src_y = 1 - block_h;
}
if (src_x >= w) {
src += (w - 1 - src_x) * sizeof(pixel);
src_x = w - 1;
} else if (src_x <= -block_w) {
src += (1 - block_w - src_x) * sizeof(pixel);
src_x = 1 - block_w;
}
start_y = FFMAX(0, -src_y);
start_x = FFMAX(0, -src_x);
end_y = FFMIN(block_h, h-src_y);
end_x = FFMIN(block_w, w-src_x);
assert(start_y < end_y && block_h);
assert(start_x < end_x && block_w);
w = end_x - start_x;
src += start_y * linesize + start_x * sizeof(pixel);
buf += start_x * sizeof(pixel);
// top
for (y = 0; y < start_y; y++) {
memcpy(buf, src, w * sizeof(pixel));
buf += linesize;
}
// copy existing part
for (; y < end_y; y++) {
memcpy(buf, src, w * sizeof(pixel));
src += linesize;
buf += linesize;
}
// bottom
src -= linesize;
for (; y < block_h; y++) {
memcpy(buf, src, w * sizeof(pixel));
buf += linesize;
}
buf -= block_h * linesize + start_x * sizeof(pixel);
while (block_h--) {
pixel *bufp = (pixel *) buf;
// left
for(x = 0; x < start_x; x++) {
bufp[x] = bufp[start_x];
}
// right
for (x = end_x; x < block_w; x++) {
bufp[x] = bufp[end_x - 1];
}
buf += linesize;
}
}
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "internal.h" #include "internal.h"
#include "dsputil.h" #include "dsputil.h"
#include "get_bits.h" #include "get_bits.h"
#include "videodsp.h"
#include "vp3data.h" #include "vp3data.h"
#include "vp3dsp.h" #include "vp3dsp.h"
#include "xiph.h" #include "xiph.h"
...@@ -136,6 +136,7 @@ typedef struct Vp3DecodeContext { ...@@ -136,6 +136,7 @@ typedef struct Vp3DecodeContext {
AVFrame current_frame; AVFrame current_frame;
int keyframe; int keyframe;
DSPContext dsp; DSPContext dsp;
VideoDSPContext vdsp;
VP3DSPContext vp3dsp; VP3DSPContext vp3dsp;
int flipped_image; int flipped_image;
int last_slice_end; int last_slice_end;
...@@ -1543,7 +1544,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) ...@@ -1543,7 +1544,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
uint8_t *temp= s->edge_emu_buffer; uint8_t *temp= s->edge_emu_buffer;
if(stride<0) temp -= 8*stride; if(stride<0) temp -= 8*stride;
s->dsp.emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height); s->vdsp.emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height);
motion_source= temp; motion_source= temp;
} }
} }
...@@ -1677,6 +1678,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx) ...@@ -1677,6 +1678,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->pix_fmt = AV_PIX_FMT_YUV420P;
avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
ff_dsputil_init(&s->dsp, avctx); ff_dsputil_init(&s->dsp, avctx);
ff_videodsp_init(&s->vdsp, 8);
ff_vp3dsp_init(&s->vp3dsp, avctx->flags); ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm);
......
...@@ -340,7 +340,7 @@ static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src, ...@@ -340,7 +340,7 @@ static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src,
if (x<0 || x+12>=s->plane_width[plane] || if (x<0 || x+12>=s->plane_width[plane] ||
y<0 || y+12>=s->plane_height[plane]) { y<0 || y+12>=s->plane_height[plane]) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer, s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
src + s->block_offset[b] + (dy-2)*stride + (dx-2), src + s->block_offset[b] + (dy-2)*stride + (dx-2),
stride, 12, 12, x, y, stride, 12, 12, x, y,
s->plane_width[plane], s->plane_width[plane],
...@@ -674,6 +674,7 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha) ...@@ -674,6 +674,7 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
avctx->pix_fmt = has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P; avctx->pix_fmt = has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
ff_dsputil_init(&s->dsp, avctx); ff_dsputil_init(&s->dsp, avctx);
ff_videodsp_init(&s->vdsp, 8);
ff_vp3dsp_init(&s->vp3dsp, avctx->flags); ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id); ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id);
ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm);
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "dsputil.h" #include "dsputil.h"
#include "get_bits.h" #include "get_bits.h"
#include "bytestream.h" #include "bytestream.h"
#include "videodsp.h"
#include "vp3dsp.h" #include "vp3dsp.h"
#include "vp56dsp.h" #include "vp56dsp.h"
...@@ -94,6 +95,7 @@ typedef struct VP56Model { ...@@ -94,6 +95,7 @@ typedef struct VP56Model {
struct vp56_context { struct vp56_context {
AVCodecContext *avctx; AVCodecContext *avctx;
DSPContext dsp; DSPContext dsp;
VideoDSPContext vdsp;
VP3DSPContext vp3dsp; VP3DSPContext vp3dsp;
VP56DSPContext vp56dsp; VP56DSPContext vp56dsp;
ScanTable scantable; ScanTable scantable;
......
...@@ -1198,9 +1198,9 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, ...@@ -1198,9 +1198,9 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
src += y_off * linesize + x_off; src += y_off * linesize + x_off;
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize, s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
x_off - mx_idx, y_off - my_idx, width, height); x_off - mx_idx, y_off - my_idx, width, height);
src = td->edge_emu_buffer + mx_idx + linesize * my_idx; src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
} }
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
...@@ -1248,15 +1248,15 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst ...@@ -1248,15 +1248,15 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst
ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0); ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize, s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
x_off - mx_idx, y_off - my_idx, width, height); x_off - mx_idx, y_off - my_idx, width, height);
src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx; src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize, s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
x_off - mx_idx, y_off - my_idx, width, height); x_off - mx_idx, y_off - my_idx, width, height);
src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx; src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
} else { } else {
...@@ -1315,9 +1315,9 @@ static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, i ...@@ -1315,9 +1315,9 @@ static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, i
/* For threading, a ff_thread_await_progress here might be useful, but /* For threading, a ff_thread_await_progress here might be useful, but
* it actually slows down the decoder. Since a bad prefetch doesn't * it actually slows down the decoder. Since a bad prefetch doesn't
* generate bad decoder output, we don't run it here. */ * generate bad decoder output, we don't run it here. */
s->dsp.prefetch(src[0]+off, s->linesize, 4); s->vdsp.prefetch(src[0]+off, s->linesize, 4);
off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
} }
} }
...@@ -1716,8 +1716,8 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, ...@@ -1716,8 +1716,8 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
} }
} }
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
if (!s->mb_layout) if (!s->mb_layout)
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy, decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
...@@ -2020,7 +2020,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) ...@@ -2020,7 +2020,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
s->avctx = avctx; s->avctx = avctx;
avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->pix_fmt = AV_PIX_FMT_YUV420P;
ff_dsputil_init(&s->dsp, avctx); ff_videodsp_init(&s->vdsp, 8);
ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1); ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
ff_vp8dsp_init(&s->vp8dsp); ff_vp8dsp_init(&s->vp8dsp);
......
...@@ -247,7 +247,7 @@ typedef struct VP8Context { ...@@ -247,7 +247,7 @@ typedef struct VP8Context {
*/ */
int num_coeff_partitions; int num_coeff_partitions;
VP56RangeCoder coeff_partition[8]; VP56RangeCoder coeff_partition[8];
DSPContext dsp; VideoDSPContext vdsp;
VP8DSPContext vp8dsp; VP8DSPContext vp8dsp;
H264PredContext hpc; H264PredContext hpc;
vp8_mc_func put_pixels_tab[3][3][3]; vp8_mc_func put_pixels_tab[3][3][3];
......
...@@ -102,7 +102,7 @@ void ff_mspel_motion(MpegEncContext *s, ...@@ -102,7 +102,7 @@ void ff_mspel_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){ if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<1 || src_y<1 || src_x + 17 >= s->h_edge_pos if(src_x<1 || src_y<1 || src_x + 17 >= s->h_edge_pos
|| src_y + h+1 >= v_edge_pos){ || src_y + h+1 >= v_edge_pos){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr - 1 - s->linesize, s->linesize, 19, 19, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr - 1 - s->linesize, s->linesize, 19, 19,
src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos); src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer + 1 + s->linesize; ptr= s->edge_emu_buffer + 1 + s->linesize;
emu=1; emu=1;
...@@ -143,7 +143,7 @@ void ff_mspel_motion(MpegEncContext *s, ...@@ -143,7 +143,7 @@ void ff_mspel_motion(MpegEncContext *s,
offset = (src_y * uvlinesize) + src_x; offset = (src_y * uvlinesize) + src_x;
ptr = ref_picture[1] + offset; ptr = ref_picture[1] + offset;
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9,
src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer; ptr= s->edge_emu_buffer;
} }
...@@ -151,7 +151,7 @@ void ff_mspel_motion(MpegEncContext *s, ...@@ -151,7 +151,7 @@ void ff_mspel_motion(MpegEncContext *s,
ptr = ref_picture[2] + offset; ptr = ref_picture[2] + offset;
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9,
src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer; ptr= s->edge_emu_buffer;
} }
......
...@@ -19,6 +19,7 @@ OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \ ...@@ -19,6 +19,7 @@ OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
x86/rv40dsp_init.o x86/rv40dsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o
...@@ -60,6 +61,7 @@ YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o ...@@ -60,6 +61,7 @@ YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \ YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
x86/rv40dsp.o x86/rv40dsp.o
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o
YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o
YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
......
This diff is collapsed.
...@@ -1635,78 +1635,6 @@ void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) ...@@ -1635,78 +1635,6 @@ void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
avg_pixels16_xy2_mmx(dst, src, stride, 16); avg_pixels16_xy2_mmx(dst, src, stride, 16);
} }
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src,
x86_reg linesize, x86_reg start_y,
x86_reg end_y, x86_reg block_h,
x86_reg start_x, x86_reg end_x,
x86_reg block_w);
extern emu_edge_core_func ff_emu_edge_core_mmx;
extern emu_edge_core_func ff_emu_edge_core_sse;
static av_always_inline void emulated_edge_mc(uint8_t *buf, const uint8_t *src,
int linesize,
int block_w, int block_h,
int src_x, int src_y,
int w, int h,
emu_edge_core_func *core_fn)
{
int start_y, start_x, end_y, end_x, src_y_add = 0;
if (src_y >= h) {
src_y_add = h - 1 - src_y;
src_y = h - 1;
} else if (src_y <= -block_h) {
src_y_add = 1 - block_h - src_y;
src_y = 1 - block_h;
}
if (src_x >= w) {
src += w - 1 - src_x;
src_x = w - 1;
} else if (src_x <= -block_w) {
src += 1 - block_w - src_x;
src_x = 1 - block_w;
}
start_y = FFMAX(0, -src_y);
start_x = FFMAX(0, -src_x);
end_y = FFMIN(block_h, h-src_y);
end_x = FFMIN(block_w, w-src_x);
assert(start_x < end_x && block_w > 0);
assert(start_y < end_y && block_h > 0);
// fill in the to-be-copied part plus all above/below
src += (src_y_add + start_y) * linesize + start_x;
buf += start_x;
core_fn(buf, src, linesize, start_y, end_y,
block_h, start_x, end_x, block_w);
}
#if ARCH_X86_32
static av_noinline void emulated_edge_mc_mmx(uint8_t *buf, const uint8_t *src,
int linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h)
{
emulated_edge_mc(buf, src, linesize, block_w, block_h, src_x, src_y,
w, h, &ff_emu_edge_core_mmx);
}
#endif
static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src,
int linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h)
{
emulated_edge_mc(buf, src, linesize, block_w, block_h, src_x, src_y,
w, h, &ff_emu_edge_core_sse);
}
#endif /* HAVE_YASM */
#if HAVE_INLINE_ASM
static void gmc_mmx(uint8_t *dst, uint8_t *src, static void gmc_mmx(uint8_t *dst, uint8_t *src,
int stride, int h, int ox, int oy, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int dxx, int dxy, int dyx, int dyy,
...@@ -1822,21 +1750,6 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, ...@@ -1822,21 +1750,6 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
src += 4 - h * stride; src += 4 - h * stride;
} }
} }
#define PREFETCH(name, op) \
static void name(void *mem, int stride, int h) \
{ \
const uint8_t *p = mem; \
do { \
__asm__ volatile (#op" %0" :: "m"(*p)); \
p += stride; \
} while (--h); \
}
PREFETCH(prefetch_mmxext, prefetcht0)
PREFETCH(prefetch_3dnow, prefetch)
#undef PREFETCH
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
#include "h264_qpel.c" #include "h264_qpel.c"
...@@ -2239,11 +2152,6 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) ...@@ -2239,11 +2152,6 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
#if HAVE_YASM #if HAVE_YASM
#if ARCH_X86_32
if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_mmx;
#endif
if (!high_bit_depth && CONFIG_H264CHROMA) { if (!high_bit_depth && CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
...@@ -2261,8 +2169,6 @@ static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, ...@@ -2261,8 +2169,6 @@ static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
const int high_bit_depth = bit_depth > 8; const int high_bit_depth = bit_depth > 8;
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
c->prefetch = prefetch_mmxext;
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, );
...@@ -2371,8 +2277,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, ...@@ -2371,8 +2277,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int high_bit_depth = avctx->bits_per_raw_sample > 8;
#if HAVE_INLINE_ASM #if HAVE_INLINE_ASM
c->prefetch = prefetch_3dnow;
if (!high_bit_depth) { if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
...@@ -2452,9 +2356,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) ...@@ -2452,9 +2356,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
c->scalarproduct_float = ff_scalarproduct_float_sse; c->scalarproduct_float = ff_scalarproduct_float_sse;
c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
} }
......
This diff is collapsed.
/*
* Copyright (C) 2012 Ronald S. Bultje
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/common.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavcodec/videodsp.h"
#if HAVE_YASM
typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src,
x86_reg linesize, x86_reg start_y,
x86_reg end_y, x86_reg block_h,
x86_reg start_x, x86_reg end_x,
x86_reg block_w);
extern emu_edge_core_func ff_emu_edge_core_mmx;
extern emu_edge_core_func ff_emu_edge_core_sse;
static av_always_inline void emulated_edge_mc(uint8_t *buf, const uint8_t *src,
ptrdiff_t linesize,
int block_w, int block_h,
int src_x, int src_y,
int w, int h,
emu_edge_core_func *core_fn)
{
int start_y, start_x, end_y, end_x, src_y_add = 0;
if (src_y >= h) {
src_y_add = h - 1 - src_y;
src_y = h - 1;
} else if (src_y <= -block_h) {
src_y_add = 1 - block_h - src_y;
src_y = 1 - block_h;
}
if (src_x >= w) {
src += w - 1 - src_x;
src_x = w - 1;
} else if (src_x <= -block_w) {
src += 1 - block_w - src_x;
src_x = 1 - block_w;
}
start_y = FFMAX(0, -src_y);
start_x = FFMAX(0, -src_x);
end_y = FFMIN(block_h, h-src_y);
end_x = FFMIN(block_w, w-src_x);
assert(start_x < end_x && block_w > 0);
assert(start_y < end_y && block_h > 0);
// fill in the to-be-copied part plus all above/below
src += (src_y_add + start_y) * linesize + start_x;
buf += start_x;
core_fn(buf, src, linesize, start_y, end_y,
block_h, start_x, end_x, block_w);
}
#if ARCH_X86_32
static av_noinline void emulated_edge_mc_mmx(uint8_t *buf, const uint8_t *src,
ptrdiff_t linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h)
{
emulated_edge_mc(buf, src, linesize, block_w, block_h, src_x, src_y,
w, h, &ff_emu_edge_core_mmx);
}
#endif
static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src,
ptrdiff_t linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h)
{
emulated_edge_mc(buf, src, linesize, block_w, block_h, src_x, src_y,
w, h, &ff_emu_edge_core_sse);
}
#endif /* HAVE_YASM */
void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h);
void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h);
void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
{
#if HAVE_YASM
int mm_flags = av_get_cpu_flags();
#if ARCH_X86_32
if (bpc <= 8 && mm_flags & AV_CPU_FLAG_MMX) {
ctx->emulated_edge_mc = emulated_edge_mc_mmx;
}
if (mm_flags & AV_CPU_FLAG_3DNOW) {
ctx->prefetch = ff_prefetch_3dnow;
}
#endif /* ARCH_X86_32 */
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
ctx->prefetch = ff_prefetch_mmxext;
}
if (bpc <= 8 && mm_flags & AV_CPU_FLAG_SSE) {
ctx->emulated_edge_mc = emulated_edge_mc_sse;
}
#endif /* HAVE_YASM */
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment