Commit 12802ec0 authored by Ronald S. Bultje's avatar Ronald S. Bultje

dsputil: move VC1-specific stuff into VC1DSPContext.

parent 0b16cdc3
......@@ -1600,54 +1600,6 @@ H264_CHROMA_MC(avg_ , op_avg)
#undef op_avg
#undef op_put
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
const int A=(8-x)*(8-y);
const int B=( x)*(8-y);
const int C=(8-x)*( y);
const int D=( x)*( y);
int i;
assert(x<8 && y<8 && x>=0 && y>=0);
for(i=0; i<h; i++)
{
dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
dst+= stride;
src+= stride;
}
}
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
const int A=(8-x)*(8-y);
const int B=( x)*(8-y);
const int C=(8-x)*( y);
const int D=( x)*( y);
int i;
assert(x<8 && y<8 && x>=0 && y>=0);
for(i=0; i<h; i++)
{
dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
dst+= stride;
src+= stride;
}
}
#define QPEL_MC(r, OPNAME, RND, OP) \
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
......@@ -4301,17 +4253,12 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
c->draw_edges = draw_edges_c;
#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
ff_mlp_init(c, avctx);
#endif
#if CONFIG_VC1_DECODER
ff_vc1dsp_init(c,avctx);
#endif
#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
ff_intrax8dsp_init(c,avctx);
#endif
......
......@@ -341,9 +341,6 @@ typedef struct DSPContext {
*/
h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
/* This is really one func used in VC-1 decoding */
h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
qpel_mc_func put_h264_qpel_pixels_tab[4][16];
qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
......@@ -503,29 +500,6 @@ typedef struct DSPContext {
unsigned int filter_shift, int32_t mask, int blocksize,
int32_t *sample_buffer);
/* vc1 functions */
void (*vc1_inv_trans_8x8)(DCTELEM *b);
void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_v_overlap)(uint8_t* src, int stride);
void (*vc1_h_overlap)(uint8_t* src, int stride);
void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
/* put 8x8 block with bicubic interpolation and quarterpel precision
* last argument is actually round value instead of height
*/
op_pixels_func put_vc1_mspel_pixels_tab[16];
op_pixels_func avg_vc1_mspel_pixels_tab[16];
/* intrax8 functions */
void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
......@@ -629,7 +603,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_dwt(DSPContext *c);
void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
......
......@@ -43,7 +43,6 @@ void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block);
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
......
......@@ -171,8 +171,6 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
dsputil_init_altivec(c, avctx);
if(CONFIG_VC1_DECODER)
vc1dsp_init_altivec(c, avctx);
float_init_altivec(c, avctx);
int_init_altivec(c, avctx);
c->gmc1 = gmc1_altivec;
......
......@@ -322,7 +322,11 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block)
}
void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
void ff_vc1dsp_init_altivec(VC1DSPContext* dsp)
{
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
return;
dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
}
......@@ -337,14 +337,14 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte
v->res_fasttx = get_bits1(gb);
if (!v->res_fasttx)
{
v->s.dsp.vc1_inv_trans_8x8 = ff_simple_idct;
v->s.dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
v->s.dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
v->s.dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
v->s.dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add;
v->s.dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
v->s.dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
v->s.dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct;
v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add;
v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
}
v->fastuvmc = get_bits1(gb); //common
......
......@@ -26,6 +26,7 @@
#include "avcodec.h"
#include "mpegvideo.h"
#include "intrax8.h"
#include "vc1dsp.h"
/** Markers used in VC-1 AP frame data */
//@{
......@@ -155,6 +156,7 @@ enum COTypes {
typedef struct VC1Context{
MpegEncContext s;
IntraX8Context x8;
VC1DSPContext vc1dsp;
int bits;
......
This diff is collapsed.
......@@ -25,7 +25,7 @@
*
*/
#include "dsputil.h"
#include "vc1dsp.h"
/** Apply overlap transform to horizontal edge
......@@ -612,7 +612,56 @@ PUT_VC1_MSPEL(1, 3)
PUT_VC1_MSPEL(2, 3)
PUT_VC1_MSPEL(3, 3)
av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
const int A=(8-x)*(8-y);
const int B=( x)*(8-y);
const int C=(8-x)*( y);
const int D=( x)*( y);
int i;
assert(x<8 && y<8 && x>=0 && y>=0);
for(i=0; i<h; i++)
{
dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
dst+= stride;
src+= stride;
}
}
#define avg2(a,b) ((a+b+1)>>1)
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
const int A=(8-x)*(8-y);
const int B=( x)*(8-y);
const int C=(8-x)*( y);
const int D=( x)*( y);
int i;
assert(x<8 && y<8 && x>=0 && y>=0);
for(i=0; i<h; i++)
{
dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
dst+= stride;
src+= stride;
}
}
av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
......@@ -663,4 +712,12 @@ av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
if (HAVE_ALTIVEC)
ff_vc1dsp_init_altivec(dsp);
if (HAVE_MMX)
ff_vc1dsp_init_mmx(dsp);
}
/*
* VC-1 and WMV3 decoder - DSP functions
* Copyright (c) 2006 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* VC-1 and WMV3 decoder
*
*/
#ifndef AVCODEC_VC1DSP_H
#define AVCODEC_VC1DSP_H
#include "dsputil.h"
typedef struct VC1DSPContext {
/* vc1 functions */
void (*vc1_inv_trans_8x8)(DCTELEM *b);
void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_v_overlap)(uint8_t* src, int stride);
void (*vc1_h_overlap)(uint8_t* src, int stride);
void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
/* put 8x8 block with bicubic interpolation and quarterpel precision
* last argument is actually round value instead of height
*/
op_pixels_func put_vc1_mspel_pixels_tab[16];
op_pixels_func avg_vc1_mspel_pixels_tab[16];
/* This is really one func used in VC-1 decoding */
h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
} VC1DSPContext;
void ff_vc1dsp_init(VC1DSPContext* c);
void ff_vc1dsp_init_altivec(VC1DSPContext* c);
void ff_vc1dsp_init_mmx(VC1DSPContext* dsp);
#endif /* AVCODEC_VC1DSP_H */
......@@ -1894,20 +1894,14 @@ PREFETCH(prefetch_3dnow, prefetch)
void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
......@@ -1931,15 +1925,11 @@ void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
......@@ -2535,7 +2525,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_YASM
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
......@@ -2622,8 +2611,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
......@@ -2636,9 +2623,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
#endif
if (CONFIG_VC1_DECODER)
ff_vc1dsp_init_mmx(c, avctx);
c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
c->prefetch = prefetch_3dnow;
......@@ -2695,8 +2679,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
#endif
......@@ -2745,8 +2727,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 3, ssse3);
c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
#if HAVE_YASM
c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
......
......@@ -196,7 +196,6 @@ void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
......
......@@ -28,6 +28,7 @@
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
#include "libavcodec/vc1dsp.h"
#define OP_PUT(S,D)
#define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
......@@ -712,30 +713,45 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
ff_vc1_h_loop_filter8_sse4(src, stride, pq);
ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
}
#endif
void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
int stride, int h, int x, int y);
void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
{
int mm_flags = av_get_cpu_flags();
dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;
dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;
dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;
dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;
dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;
dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;
dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;
dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;
dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;
dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
if (mm_flags & AV_CPU_FLAG_MMX) {
dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;
dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;
dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;
dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;
dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;
dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;
dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;
dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;
dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;
dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
}
if (mm_flags & AV_CPU_FLAG_MMX2){
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
......@@ -775,11 +791,16 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
ASSIGN_LF(mmx);
dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
}
return;
if (mm_flags & AV_CPU_FLAG_MMX2) {
ASSIGN_LF(mmx2);
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
}
if (mm_flags & AV_CPU_FLAG_SSE2) {
dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2;
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2;
......@@ -788,6 +809,8 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
}
if (mm_flags & AV_CPU_FLAG_SSSE3) {
ASSIGN_LF(ssse3);
dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
}
if (mm_flags & AV_CPU_FLAG_SSE4) {
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment