Commit d1293512 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Michael Niedermayer

vp3: use hpeldsp instead of dsputil for half-pel functions.

This makes vp3 independent of dsputil.
parent 9628e5a4
...@@ -1823,7 +1823,7 @@ vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel" ...@@ -1823,7 +1823,7 @@ vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel"
vc1image_decoder_select="vc1_decoder" vc1image_decoder_select="vc1_decoder"
vorbis_decoder_select="mdct" vorbis_decoder_select="mdct"
vorbis_encoder_select="mdct" vorbis_encoder_select="mdct"
vp3_decoder_select="dsputil vp3dsp videodsp" vp3_decoder_select="hpeldsp vp3dsp videodsp"
vp5_decoder_select="dsputil h264chroma videodsp vp3dsp" vp5_decoder_select="dsputil h264chroma videodsp vp3dsp"
vp6_decoder_select="dsputil h264chroma huffman videodsp vp3dsp" vp6_decoder_select="dsputil h264chroma huffman videodsp vp3dsp"
vp6a_decoder_select="vp6_decoder" vp6a_decoder_select="vp6_decoder"
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "internal.h" #include "internal.h"
#include "dsputil.h" #include "dsputil.h"
#include "get_bits.h" #include "get_bits.h"
#include "hpeldsp.h"
#include "videodsp.h" #include "videodsp.h"
#include "vp3data.h" #include "vp3data.h"
#include "vp3dsp.h" #include "vp3dsp.h"
...@@ -141,7 +142,7 @@ typedef struct Vp3DecodeContext { ...@@ -141,7 +142,7 @@ typedef struct Vp3DecodeContext {
int keyframe; int keyframe;
uint8_t idct_permutation[64]; uint8_t idct_permutation[64];
uint8_t idct_scantable[64]; uint8_t idct_scantable[64];
DSPContext dsp; HpelDSPContext hdsp;
VideoDSPContext vdsp; VideoDSPContext vdsp;
VP3DSPContext vp3dsp; VP3DSPContext vp3dsp;
DECLARE_ALIGNED(16, int16_t, block)[64]; DECLARE_ALIGNED(16, int16_t, block)[64];
...@@ -1561,7 +1562,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) ...@@ -1561,7 +1562,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
VP3 source but this would be slower as VP3 source but this would be slower as
put_no_rnd_pixels_tab is better optimzed */ put_no_rnd_pixels_tab is better optimzed */
if(motion_halfpel_index != 3){ if(motion_halfpel_index != 3){
s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index]( s->hdsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
output_plane + first_pixel, output_plane + first_pixel,
motion_source, stride, 8); motion_source, stride, 8);
}else{ }else{
...@@ -1595,7 +1596,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) ...@@ -1595,7 +1596,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
} else { } else {
/* copy directly from the previous frame */ /* copy directly from the previous frame */
s->dsp.put_pixels_tab[1][0]( s->hdsp.put_pixels_tab[1][0](
output_plane + first_pixel, output_plane + first_pixel,
last_plane + first_pixel, last_plane + first_pixel,
stride, 8); stride, 8);
...@@ -1694,7 +1695,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx) ...@@ -1694,7 +1695,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
if (avctx->codec_id != AV_CODEC_ID_THEORA) if (avctx->codec_id != AV_CODEC_ID_THEORA)
avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->pix_fmt = AV_PIX_FMT_YUV420P;
avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
ff_dsputil_init(&s->dsp, avctx); ff_hpeldsp_init(&s->hdsp, avctx->flags | CODEC_FLAG_BITEXACT);
ff_videodsp_init(&s->vdsp, 8); ff_videodsp_init(&s->vdsp, 8);
ff_vp3dsp_init(&s->vp3dsp, avctx->flags); ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
......
...@@ -52,10 +52,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; ...@@ -52,10 +52,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_7) = 0x0707070707070707ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F) = 0x1F1F1F1F1F1F1F1FULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F) = 0x3F3F3F3F3F3F3F3FULL; DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F) = 0x3F3F3F3F3F3F3F3FULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_81) = 0x8181818181818181ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL; DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
......
...@@ -49,10 +49,7 @@ extern const uint64_t ff_pw_255; ...@@ -49,10 +49,7 @@ extern const uint64_t ff_pw_255;
extern const xmm_reg ff_pb_1; extern const xmm_reg ff_pb_1;
extern const xmm_reg ff_pb_3; extern const xmm_reg ff_pb_3;
extern const uint64_t ff_pb_7;
extern const uint64_t ff_pb_1F;
extern const uint64_t ff_pb_3F; extern const uint64_t ff_pb_3F;
extern const uint64_t ff_pb_81;
extern const xmm_reg ff_pb_F8; extern const xmm_reg ff_pb_F8;
extern const uint64_t ff_pb_FC; extern const uint64_t ff_pb_FC;
......
...@@ -33,12 +33,13 @@ vp3_idct_data: times 8 dw 64277 ...@@ -33,12 +33,13 @@ vp3_idct_data: times 8 dw 64277
times 8 dw 25080 times 8 dw 25080
times 8 dw 12785 times 8 dw 12785
pb_7: times 8 db 7
pb_1F: times 8 db 0x1f
pb_81: times 8 db 0x81
cextern pb_1 cextern pb_1
cextern pb_3 cextern pb_3
cextern pb_7
cextern pb_1F
cextern pb_80 cextern pb_80
cextern pb_81
cextern pw_8 cextern pw_8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment