Commit ea0931fb authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '65d5d586'

* commit '65d5d586':
  dsputil: Move SVQ1 encoding specific bits into svq1enc

Conflicts:
	libavcodec/x86/Makefile
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents cb8763bd 65d5d586
...@@ -2216,16 +2216,6 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, ...@@ -2216,16 +2216,6 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
return score; return score;
} }
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
int size)
{
int score = 0, i;
for (i = 0; i < size; i++)
score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
return score;
}
#define WRAPPER8_16_SQ(name8, name16) \ #define WRAPPER8_16_SQ(name8, name16) \
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \ static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
int stride, int h) \ int stride, int h) \
...@@ -2626,8 +2616,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) ...@@ -2626,8 +2616,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
ff_dsputil_init_dwt(c); ff_dsputil_init_dwt(c);
#endif #endif
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
c->bswap_buf = bswap_buf; c->bswap_buf = bswap_buf;
c->bswap16_buf = bswap16_buf; c->bswap16_buf = bswap16_buf;
......
...@@ -177,9 +177,6 @@ typedef struct DSPContext { ...@@ -177,9 +177,6 @@ typedef struct DSPContext {
me_cmp_func ildct_cmp[6]; // only width 16 used me_cmp_func ildct_cmp[6]; // only width 16 used
me_cmp_func frame_skip_cmp[6]; // only width 8 used me_cmp_func frame_skip_cmp[6]; // only width 8 used
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
int size);
qpel_mc_func put_qpel_pixels_tab[2][16]; qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16]; qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
......
...@@ -12,6 +12,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o ...@@ -12,6 +12,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
OBJS-$(CONFIG_SVQ1_ENCODER) += ppc/svq1enc_altivec.o
OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o
OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
......
...@@ -34,48 +34,6 @@ ...@@ -34,48 +34,6 @@
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "dsputil_altivec.h" #include "dsputil_altivec.h"
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
int size)
{
int i, size16 = size >> 4;
vector signed char vpix1;
vector signed short vpix2, vdiff, vpix1l, vpix1h;
union {
vector signed int vscore;
int32_t score[4];
} u = { .vscore = vec_splat_s32(0) };
// XXX lazy way, fix it later
while (size16) {
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
// load pix1 and the first batch of pix2
vpix1 = vec_unaligned_load(pix1);
vpix2 = vec_unaligned_load(pix2);
pix2 += 8;
// unpack
vpix1h = vec_unpackh(vpix1);
vdiff = vec_sub(vpix1h, vpix2);
vpix1l = vec_unpackl(vpix1);
// load another batch from pix2
vpix2 = vec_unaligned_load(pix2);
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
vdiff = vec_sub(vpix1l, vpix2);
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
pix1 += 16;
pix2 += 8;
size16--;
}
u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
size %= 16;
for (i = 0; i < size; i++)
u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
return u.score[3];
}
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
int order) int order)
{ {
...@@ -140,8 +98,6 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, ...@@ -140,8 +98,6 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
{ {
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
c->scalarproduct_int16 = scalarproduct_int16_altivec; c->scalarproduct_int16 = scalarproduct_int16_altivec;
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec; c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
......
/*
* Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include "libavutil/attributes.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/svq1enc.h"
#if HAVE_ALTIVEC
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
int size)
{
int i, size16 = size >> 4;
vector signed char vpix1;
vector signed short vpix2, vdiff, vpix1l, vpix1h;
union {
vector signed int vscore;
int32_t score[4];
} u = { .vscore = vec_splat_s32(0) };
while (size16) {
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
// load pix1 and the first batch of pix2
vpix1 = vec_unaligned_load(pix1);
vpix2 = vec_unaligned_load(pix2);
pix2 += 8;
// unpack
vpix1h = vec_unpackh(vpix1);
vdiff = vec_sub(vpix1h, vpix2);
vpix1l = vec_unpackl(vpix1);
// load another batch from pix2
vpix2 = vec_unaligned_load(pix2);
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
vdiff = vec_sub(vpix1l, vpix2);
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
pix1 += 16;
pix2 += 8;
size16--;
}
u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
size %= 16;
for (i = 0; i < size; i++)
u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
return u.score[3];
}
#endif /* HAVE_ALTIVEC */
av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
{
#if HAVE_ALTIVEC
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
#endif /* HAVE_ALTIVEC */
}
...@@ -34,48 +34,11 @@ ...@@ -34,48 +34,11 @@
#include "internal.h" #include "internal.h"
#include "mpegutils.h" #include "mpegutils.h"
#include "svq1.h" #include "svq1.h"
#include "svq1enc.h"
#include "svq1enc_cb.h" #include "svq1enc_cb.h"
#include "libavutil/avassert.h" #include "libavutil/avassert.h"
typedef struct SVQ1EncContext {
/* FIXME: Needed for motion estimation, should not be used for anything
* else, the idea is to make the motion estimation eventually independent
* of MpegEncContext, so this will be removed then. */
MpegEncContext m;
AVCodecContext *avctx;
DSPContext dsp;
HpelDSPContext hdsp;
AVFrame *current_picture;
AVFrame *last_picture;
PutBitContext pb;
GetBitContext gb;
/* why ooh why this sick breadth first order,
* everything is slower and more complex */
PutBitContext reorder_pb[6];
int frame_width;
int frame_height;
/* Y plane block dimensions */
int y_block_width;
int y_block_height;
/* U & V plane (C planes) block dimensions */
int c_block_width;
int c_block_height;
uint16_t *mb_type;
uint32_t *dummy;
int16_t (*motion_val8[3])[2];
int16_t (*motion_val16[3])[2];
int64_t rd_total;
uint8_t *scratchbuf;
} SVQ1EncContext;
static void svq1_write_header(SVQ1EncContext *s, int frame_type) static void svq1_write_header(SVQ1EncContext *s, int frame_type)
{ {
int i; int i;
...@@ -113,6 +76,16 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type) ...@@ -113,6 +76,16 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
#define QUALITY_THRESHOLD 100 #define QUALITY_THRESHOLD 100
#define THRESHOLD_MULTIPLIER 0.6 #define THRESHOLD_MULTIPLIER 0.6
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
int size)
{
int score = 0, i;
for (i = 0; i < size; i++)
score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
return score;
}
static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref, static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
uint8_t *decoded, int stride, int level, uint8_t *decoded, int stride, int level,
int threshold, int lambda, int intra) int threshold, int lambda, int intra)
...@@ -174,7 +147,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref, ...@@ -174,7 +147,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
int sqr, diff, score; int sqr, diff, score;
vector = codebook + stage * size * 16 + i * size; vector = codebook + stage * size * 16 + i * size;
sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size); sqr = s->ssd_int8_vs_int16(vector, block[stage], size);
diff = block_sum[stage] - sum; diff = block_sum[stage] - sum;
score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
if (score < best_vector_score) { if (score < best_vector_score) {
...@@ -580,6 +553,13 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx) ...@@ -580,6 +553,13 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx)
s->y_block_height * sizeof(int16_t)); s->y_block_height * sizeof(int16_t));
s->dummy = av_mallocz((s->y_block_width + 1) * s->dummy = av_mallocz((s->y_block_width + 1) *
s->y_block_height * sizeof(int32_t)); s->y_block_height * sizeof(int32_t));
s->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
if (ARCH_PPC)
ff_svq1enc_init_ppc(s);
if (ARCH_X86)
ff_svq1enc_init_x86(s);
ff_h263_encode_init(&s->m); // mv_penalty ff_h263_encode_init(&s->m); // mv_penalty
return 0; return 0;
......
/*
* SVQ1 encoder
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_SVQ1ENC_H
#define AVCODEC_SVQ1ENC_H
#include <stdint.h>
#include "libavutil/frame.h"
#include "avcodec.h"
#include "dsputil.h"
#include "get_bits.h"
#include "hpeldsp.h"
#include "mpegvideo.h"
#include "put_bits.h"
typedef struct SVQ1EncContext {
/* FIXME: Needed for motion estimation, should not be used for anything
* else, the idea is to make the motion estimation eventually independent
* of MpegEncContext, so this will be removed then. */
MpegEncContext m;
AVCodecContext *avctx;
DSPContext dsp;
HpelDSPContext hdsp;
AVFrame *current_picture;
AVFrame *last_picture;
PutBitContext pb;
GetBitContext gb;
/* why ooh why this sick breadth first order,
* everything is slower and more complex */
PutBitContext reorder_pb[6];
int frame_width;
int frame_height;
/* Y plane block dimensions */
int y_block_width;
int y_block_height;
/* U & V plane (C planes) block dimensions */
int c_block_width;
int c_block_height;
uint16_t *mb_type;
uint32_t *dummy;
int16_t (*motion_val8[3])[2];
int16_t (*motion_val16[3])[2];
int64_t rd_total;
uint8_t *scratchbuf;
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
int size);
} SVQ1EncContext;
void ff_svq1enc_init_ppc(SVQ1EncContext *c);
void ff_svq1enc_init_x86(SVQ1EncContext *c);
#endif /* AVCODEC_SVQ1ENC_H */
...@@ -59,6 +59,7 @@ MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o ...@@ -59,6 +59,7 @@ MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o
MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
YASM-OBJS += x86/deinterlace.o \ YASM-OBJS += x86/deinterlace.o \
......
...@@ -703,40 +703,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ...@@ -703,40 +703,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
#undef SUM #undef SUM
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
int size)
{
int sum;
x86_reg i = size;
__asm__ volatile (
"pxor %%mm4, %%mm4 \n"
"1: \n"
"sub $8, %0 \n"
"movq (%2, %0), %%mm2 \n"
"movq (%3, %0, 2), %%mm0 \n"
"movq 8(%3, %0, 2), %%mm1 \n"
"punpckhbw %%mm2, %%mm3 \n"
"punpcklbw %%mm2, %%mm2 \n"
"psraw $8, %%mm3 \n"
"psraw $8, %%mm2 \n"
"psubw %%mm3, %%mm1 \n"
"psubw %%mm2, %%mm0 \n"
"pmaddwd %%mm1, %%mm1 \n"
"pmaddwd %%mm0, %%mm0 \n"
"paddd %%mm1, %%mm4 \n"
"paddd %%mm0, %%mm4 \n"
"jg 1b \n"
"movq %%mm4, %%mm3 \n"
"psrlq $32, %%mm3 \n"
"paddd %%mm3, %%mm4 \n"
"movd %%mm4, %1 \n"
: "+r" (i), "=r" (sum)
: "r" (pix1), "r" (pix2));
return sum;
}
#define PHADDD(a, t) \ #define PHADDD(a, t) \
"movq " #a ", " #t " \n\t" \ "movq " #a ", " #t " \n\t" \
"psrlq $32, " #a " \n\t" \ "psrlq $32, " #a " \n\t" \
...@@ -854,8 +820,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, ...@@ -854,8 +820,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
c->try_8x8basis = try_8x8basis_mmx; c->try_8x8basis = try_8x8basis_mmx;
} }
c->add_8x8basis = add_8x8basis_mmx; c->add_8x8basis = add_8x8basis_mmx;
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
} }
if (INLINE_AMD3DNOW(cpu_flags)) { if (INLINE_AMD3DNOW(cpu_flags)) {
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/svq1enc.h"
#if HAVE_INLINE_ASM
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
int size)
{
int sum;
x86_reg i = size;
__asm__ volatile (
"pxor %%mm4, %%mm4 \n"
"1: \n"
"sub $8, %0 \n"
"movq (%2, %0), %%mm2 \n"
"movq (%3, %0, 2), %%mm0 \n"
"movq 8(%3, %0, 2), %%mm1 \n"
"punpckhbw %%mm2, %%mm3 \n"
"punpcklbw %%mm2, %%mm2 \n"
"psraw $8, %%mm3 \n"
"psraw $8, %%mm2 \n"
"psubw %%mm3, %%mm1 \n"
"psubw %%mm2, %%mm0 \n"
"pmaddwd %%mm1, %%mm1 \n"
"pmaddwd %%mm0, %%mm0 \n"
"paddd %%mm1, %%mm4 \n"
"paddd %%mm0, %%mm4 \n"
"jg 1b \n"
"movq %%mm4, %%mm3 \n"
"psrlq $32, %%mm3 \n"
"paddd %%mm3, %%mm4 \n"
"movd %%mm4, %1 \n"
: "+r" (i), "=r" (sum)
: "r" (pix1), "r" (pix2));
return sum;
}
#endif /* HAVE_INLINE_ASM */
av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
{
#if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags();
if (INLINE_MMX(cpu_flags)) {
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
}
#endif /* HAVE_INLINE_ASM */
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment