Commit 71617884 authored by Janne Grunau's avatar Janne Grunau

aarch64: h264 chroma motion compensation NEON optimizations

Since RV40 and VC-1 use almost the same algorithm so optimizations for
those two decoders are easy to do and included.
parent b7b17ed6
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o
NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
/*
* ARM NEON optimised H.264 chroma functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/h264chroma.h"
#include "config.h"
void ff_put_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_put_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_put_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_avg_h264_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_avg_h264_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
void ff_avg_h264_chroma_mc2_neon(uint8_t *dst, uint8_t *src, int stride,
int h, int x, int y);
av_cold void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth)
{
const int high_bit_depth = bit_depth > 8;
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags) && !high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
}
}
This diff is collapsed.
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/rv34dsp.h"
#include "config.h"
void ff_put_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_put_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_rv40_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_rv40_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_neon;
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_neon;
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_neon;
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_neon;
}
}
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/vc1dsp.h"
#include "config.h"
void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
int x, int y);
av_cold void ff_vc1dsp_init_aarch64(VC1DSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_neon;
dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_vc1_chroma_mc4_neon;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_vc1_chroma_mc4_neon;
}
}
...@@ -44,6 +44,8 @@ av_cold void ff_h264chroma_init(H264ChromaContext *c, int bit_depth) ...@@ -44,6 +44,8 @@ av_cold void ff_h264chroma_init(H264ChromaContext *c, int bit_depth)
SET_CHROMA(8); SET_CHROMA(8);
} }
if (ARCH_AARCH64)
ff_h264chroma_init_aarch64(c, bit_depth);
if (ARCH_ARM) if (ARCH_ARM)
ff_h264chroma_init_arm(c, bit_depth); ff_h264chroma_init_arm(c, bit_depth);
if (ARCH_PPC) if (ARCH_PPC)
......
...@@ -30,6 +30,7 @@ typedef struct H264ChromaContext { ...@@ -30,6 +30,7 @@ typedef struct H264ChromaContext {
void ff_h264chroma_init(H264ChromaContext *c, int bit_depth); void ff_h264chroma_init(H264ChromaContext *c, int bit_depth);
void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth);
void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth); void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth); void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth); void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
......
...@@ -81,6 +81,7 @@ void ff_rv40dsp_init(RV34DSPContext *c); ...@@ -81,6 +81,7 @@ void ff_rv40dsp_init(RV34DSPContext *c);
void ff_rv34dsp_init_arm(RV34DSPContext *c); void ff_rv34dsp_init_arm(RV34DSPContext *c);
void ff_rv34dsp_init_x86(RV34DSPContext *c); void ff_rv34dsp_init_x86(RV34DSPContext *c);
void ff_rv40dsp_init_aarch64(RV34DSPContext *c);
void ff_rv40dsp_init_x86(RV34DSPContext *c); void ff_rv40dsp_init_x86(RV34DSPContext *c);
void ff_rv40dsp_init_arm(RV34DSPContext *c); void ff_rv40dsp_init_arm(RV34DSPContext *c);
......
...@@ -618,6 +618,8 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c) ...@@ -618,6 +618,8 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c)
c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength; c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength;
c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength; c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength;
if (ARCH_AARCH64)
ff_rv40dsp_init_aarch64(c);
if (ARCH_ARM) if (ARCH_ARM)
ff_rv40dsp_init_arm(c); ff_rv40dsp_init_arm(c);
if (ARCH_X86) if (ARCH_X86)
......
...@@ -934,6 +934,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) ...@@ -934,6 +934,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c; dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
#endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
if (ARCH_AARCH64)
ff_vc1dsp_init_aarch64(dsp);
if (ARCH_ARM) if (ARCH_ARM)
ff_vc1dsp_init_arm(dsp); ff_vc1dsp_init_arm(dsp);
if (ARCH_PPC) if (ARCH_PPC)
......
...@@ -75,6 +75,7 @@ typedef struct VC1DSPContext { ...@@ -75,6 +75,7 @@ typedef struct VC1DSPContext {
} VC1DSPContext; } VC1DSPContext;
void ff_vc1dsp_init(VC1DSPContext* c); void ff_vc1dsp_init(VC1DSPContext* c);
void ff_vc1dsp_init_aarch64(VC1DSPContext* dsp);
void ff_vc1dsp_init_arm(VC1DSPContext* dsp); void ff_vc1dsp_init_arm(VC1DSPContext* dsp);
void ff_vc1dsp_init_ppc(VC1DSPContext *c); void ff_vc1dsp_init_ppc(VC1DSPContext *c);
void ff_vc1dsp_init_x86(VC1DSPContext* dsp); void ff_vc1dsp_init_x86(VC1DSPContext* dsp);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment