Commit b8664c92 authored by Peter Ross's avatar Peter Ross Committed by Michael Niedermayer

avcodec/vp8dsp: add VP7 idct and loop filter

Signed-off-by: 's avatarPeter Ross <pross@xvid.org>
Reviewed-by: 's avatar"Ronald S. Bultje" <rsbultje@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent a91d9e4b
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
#include "libavcodec/vp8dsp.h" #include "libavcodec/vp8dsp.h"
void ff_vp8dsp_init_armv6(VP8DSPContext *dsp); void ff_vp8dsp_init_armv6(VP8DSPContext *dsp, int vp7);
void ff_vp8dsp_init_neon(VP8DSPContext *dsp); void ff_vp8dsp_init_neon(VP8DSPContext *dsp, int vp7);
#define VP8_LF_Y(hv, inner, opt) \ #define VP8_LF_Y(hv, inner, opt) \
void ff_vp8_##hv##_loop_filter16##inner##_##opt(uint8_t *dst, \ void ff_vp8_##hv##_loop_filter16##inner##_##opt(uint8_t *dst, \
......
...@@ -23,12 +23,12 @@ ...@@ -23,12 +23,12 @@
#include "libavcodec/vp8dsp.h" #include "libavcodec/vp8dsp.h"
#include "vp8dsp.h" #include "vp8dsp.h"
av_cold void ff_vp8dsp_init_arm(VP8DSPContext *dsp) av_cold void ff_vp8dsp_init_arm(VP8DSPContext *dsp, int vp7)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (have_armv6(cpu_flags)) if (have_armv6(cpu_flags))
ff_vp8dsp_init_armv6(dsp); ff_vp8dsp_init_armv6(dsp, vp7);
if (have_neon(cpu_flags)) if (have_neon(cpu_flags))
ff_vp8dsp_init_neon(dsp); ff_vp8dsp_init_neon(dsp, vp7);
} }
...@@ -40,8 +40,9 @@ VP8_BILIN(16, armv6); ...@@ -40,8 +40,9 @@ VP8_BILIN(16, armv6);
VP8_BILIN(8, armv6); VP8_BILIN(8, armv6);
VP8_BILIN(4, armv6); VP8_BILIN(4, armv6);
av_cold void ff_vp8dsp_init_armv6(VP8DSPContext *dsp) av_cold void ff_vp8dsp_init_armv6(VP8DSPContext *dsp, int vp7)
{ {
if (!vp7) {
dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_armv6; dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_armv6;
dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_armv6; dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_armv6;
...@@ -62,6 +63,7 @@ av_cold void ff_vp8dsp_init_armv6(VP8DSPContext *dsp) ...@@ -62,6 +63,7 @@ av_cold void ff_vp8dsp_init_armv6(VP8DSPContext *dsp)
dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_armv6; dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_armv6;
dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_armv6; dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_armv6;
}
dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_armv6; dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_armv6;
dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_armv6; dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_armv6;
......
...@@ -39,8 +39,9 @@ VP8_BILIN(16, neon); ...@@ -39,8 +39,9 @@ VP8_BILIN(16, neon);
VP8_BILIN(8, neon); VP8_BILIN(8, neon);
VP8_BILIN(4, neon); VP8_BILIN(4, neon);
av_cold void ff_vp8dsp_init_neon(VP8DSPContext *dsp) av_cold void ff_vp8dsp_init_neon(VP8DSPContext *dsp, int vp7)
{ {
if (!vp7) {
dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_neon; dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_neon;
dsp->vp8_idct_add = ff_vp8_idct_add_neon; dsp->vp8_idct_add = ff_vp8_idct_add_neon;
...@@ -60,6 +61,7 @@ av_cold void ff_vp8dsp_init_neon(VP8DSPContext *dsp) ...@@ -60,6 +61,7 @@ av_cold void ff_vp8dsp_init_neon(VP8DSPContext *dsp)
dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_neon; dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_neon;
dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_neon; dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_neon;
}
dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon; dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon;
dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_neon; dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_neon;
......
...@@ -2010,7 +2010,7 @@ av_cold int ff_vp8_decode_init(AVCodecContext *avctx) ...@@ -2010,7 +2010,7 @@ av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
ff_videodsp_init(&s->vdsp, 8); ff_videodsp_init(&s->vdsp, 8);
ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1); ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
ff_vp8dsp_init(&s->vp8dsp); ff_vp8dsp_init(&s->vp8dsp, 0);
if ((ret = vp8_init_frames(s)) < 0) { if ((ret = vp8_init_frames(s)) < 0) {
ff_vp8_decode_free(avctx); ff_vp8_decode_free(avctx);
......
This diff is collapsed.
...@@ -88,9 +88,9 @@ void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ...@@ -88,9 +88,9 @@ void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride, void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y); int h, int x, int y);
void ff_vp8dsp_init(VP8DSPContext *c); void ff_vp8dsp_init(VP8DSPContext *c, int vp7);
void ff_vp8dsp_init_x86(VP8DSPContext *c); void ff_vp8dsp_init_x86(VP8DSPContext *c, int vp7);
void ff_vp8dsp_init_arm(VP8DSPContext *c); void ff_vp8dsp_init_arm(VP8DSPContext *c, int vp7);
void ff_vp8dsp_init_ppc(VP8DSPContext *c); void ff_vp8dsp_init_ppc(VP8DSPContext *c);
#endif /* AVCODEC_VP8DSP_H */ #endif /* AVCODEC_VP8DSP_H */
...@@ -315,18 +315,22 @@ DECLARE_LOOP_FILTER(sse4) ...@@ -315,18 +315,22 @@ DECLARE_LOOP_FILTER(sse4)
c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c, int vp7)
{ {
#if HAVE_YASM #if HAVE_YASM
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) { if (EXTERNAL_MMX(cpu_flags)) {
if (!vp7) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx; c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx; c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
}
#if ARCH_X86_32 #if ARCH_X86_32
if (!vp7) {
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx; c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx;
c->vp8_idct_add = ff_vp8_idct_add_mmx; c->vp8_idct_add = ff_vp8_idct_add_mmx;
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx; c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx;
}
c->put_vp8_epel_pixels_tab[0][0][0] = c->put_vp8_epel_pixels_tab[0][0][0] =
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx; c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
#endif #endif
...@@ -334,6 +338,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -334,6 +338,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
#if ARCH_X86_32 #if ARCH_X86_32
if (!vp7) {
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
...@@ -346,6 +351,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -346,6 +351,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx;
}
#endif #endif
} }
...@@ -360,6 +366,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -360,6 +366,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
VP8_BILINEAR_MC_FUNC(0, 16, mmxext); VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
VP8_BILINEAR_MC_FUNC(1, 8, mmxext); VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
if (!vp7) {
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
...@@ -372,12 +379,15 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -372,12 +379,15 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
}
#endif #endif
} }
if (EXTERNAL_SSE(cpu_flags)) { if (EXTERNAL_SSE(cpu_flags)) {
if (!vp7) {
c->vp8_idct_add = ff_vp8_idct_add_sse; c->vp8_idct_add = ff_vp8_idct_add_sse;
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
}
c->put_vp8_epel_pixels_tab[0][0][0] = c->put_vp8_epel_pixels_tab[0][0][0] =
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
} }
...@@ -388,6 +398,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -388,6 +398,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
VP8_BILINEAR_MC_FUNC(0, 16, sse2); VP8_BILINEAR_MC_FUNC(0, 16, sse2);
VP8_BILINEAR_MC_FUNC(1, 8, sse2); VP8_BILINEAR_MC_FUNC(1, 8, sse2);
if (!vp7) {
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
...@@ -395,9 +406,11 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -395,9 +406,11 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
}
} }
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
if (!vp7) {
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
...@@ -407,6 +420,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -407,6 +420,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
}
} }
if (EXTERNAL_SSSE3(cpu_flags)) { if (EXTERNAL_SSSE3(cpu_flags)) {
...@@ -417,6 +431,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -417,6 +431,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
VP8_BILINEAR_MC_FUNC(1, 8, ssse3); VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
VP8_BILINEAR_MC_FUNC(2, 4, ssse3); VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
if (!vp7) {
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
...@@ -429,14 +444,17 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) ...@@ -429,14 +444,17 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
}
} }
if (EXTERNAL_SSE4(cpu_flags)) { if (EXTERNAL_SSE4(cpu_flags)) {
if (!vp7) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4;
}
} }
#endif /* HAVE_YASM */ #endif /* HAVE_YASM */
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment