Commit 466e32bf authored by James Almer's avatar James Almer

x86/vf_fspp: port inline asm to yasm

Reviewed-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 9224c7f0
......@@ -151,11 +151,11 @@ static void store_slice2_c(uint8_t *dst, int16_t *src,
}
}
static void mul_thrmat_c(FSPPContext *p, int q)
static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
{
int a;
for (a = 0; a < 64; a++)
((int16_t *)p->threshold_mtx)[a] = q * ((int16_t *)p->threshold_mtx_noq)[a];//ints faster in C
thr_adr[a] = q * thr_adr_noq[a];
}
static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
......@@ -220,7 +220,7 @@ static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
t = qp_store[qy + (t >> qpsh)];
t = norm_qscale(t, p->qscale_type);
if (t != p->prev_q) p->prev_q = t, p->mul_thrmat(p, t);
if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
}
p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
......@@ -378,7 +378,7 @@ static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int
}
}
static void row_idct_c(int16_t *workspace, int16_t *output_adr, int output_stride, int cnt)
static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
{
int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int_simd16_t tmp10, tmp11, tmp12, tmp13;
......@@ -440,7 +440,7 @@ static void row_idct_c(int16_t *workspace, int16_t *output_adr, int output_strid
}
}
static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt)
static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
{
int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int_simd16_t tmp10, tmp11, tmp12, tmp13;
......@@ -582,7 +582,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
}
if (fspp->qp)
fspp->prev_q = fspp->qp, fspp->mul_thrmat(fspp, fspp->qp);
fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
/* if we are not in a constant user quantizer mode and we don't want to use
* the quantizers from the B-frames (B-frames often have a higher QP), we
......
......@@ -79,16 +79,16 @@ typedef struct FSPPContext {
ptrdiff_t dst_stride, ptrdiff_t src_stride,
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
void (*mul_thrmat)(struct FSPPContext *fspp, int q);
void (*mul_thrmat)(int16_t *thr_adr_noq, int16_t *thr_adr, int q);
void (*column_fidct)(int16_t *thr_adr, int16_t *data,
int16_t *output, int cnt);
void (*row_idct)(int16_t *workspace, int16_t *output_adr,
int output_stride, int cnt);
ptrdiff_t output_stride, int cnt);
void (*row_fdct)(int16_t *data, const uint8_t *pixels,
int line_size, int cnt);
ptrdiff_t line_size, int cnt);
} FSPPContext;
......
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
......@@ -10,6 +10,7 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
YASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
......
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
* Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "libavutil/attributes.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/vf_fspp.h"
void ff_store_slice_mmx(uint8_t *dst, int16_t *src,
ptrdiff_t dst_stride, ptrdiff_t src_stride,
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
void ff_store_slice2_mmx(uint8_t *dst, int16_t *src,
ptrdiff_t dst_stride, ptrdiff_t src_stride,
ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale);
void ff_mul_thrmat_mmx(int16_t *thr_adr_noq, int16_t *thr_adr, int q);
void ff_column_fidct_mmx(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt);
void ff_row_idct_mmx(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt);
void ff_row_fdct_mmx(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt);
av_cold void ff_fspp_init_x86(FSPPContext *s)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
s->store_slice = ff_store_slice_mmx;
s->store_slice2 = ff_store_slice2_mmx;
s->mul_thrmat = ff_mul_thrmat_mmx;
s->column_fidct = ff_column_fidct_mmx;
s->row_idct = ff_row_idct_mmx;
s->row_fdct = ff_row_fdct_mmx;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment