Commit 363bd1c6 authored by Janne Grunau's avatar Janne Grunau

remove iwmmxt optimizations

The were broken since August of 2010 without anyone noticing until
three weeks ago. Nobody cares about it anymore and hopefully Marvell
will support NEON like in the PXA978 from now on.
parent 33c5c3ad
......@@ -234,7 +234,6 @@ Advanced options (experts only):
--disable-armv6 disable armv6 optimizations
--disable-armv6t2 disable armv6t2 optimizations
--disable-armvfp disable ARM VFP optimizations
--disable-iwmmxt disable iwmmxt optimizations
--disable-mmi disable MMI optimizations
--disable-neon disable NEON optimizations
--disable-vis disable VIS optimizations
......@@ -1032,7 +1031,6 @@ ARCH_EXT_LIST='
armv6t2
armvfp
avx
iwmmxt
mmi
mmx
mmx2
......@@ -1229,7 +1227,6 @@ armv5te_deps="arm"
armv6_deps="arm"
armv6t2_deps="arm"
armvfp_deps="arm"
iwmmxt_deps="arm"
neon_deps="arm"
vfpv3_deps="armvfp"
......@@ -2690,7 +2687,6 @@ EOF
enabled armv6 && check_asm armv6 '"sadd16 r0, r0, r0"'
enabled armv6t2 && check_asm armv6t2 '"movt r0, #0"'
enabled armvfp && check_asm armvfp '"fadds s0, s0, s0"'
enabled iwmmxt && check_asm iwmmxt '"wunpckelub wr6, wr4"'
enabled neon && check_asm neon '"vadd.i16 q0, q0, q0"'
enabled vfpv3 && check_asm vfpv3 '"vmov.f32 s0, #1.0"'
......@@ -3199,7 +3195,6 @@ if enabled arm; then
echo "ARMv6 enabled ${armv6-no}"
echo "ARMv6T2 enabled ${armv6t2-no}"
echo "ARM VFP enabled ${armvfp-no}"
echo "IWMMXT enabled ${iwmmxt-no}"
echo "NEON enabled ${neon-no}"
fi
if enabled mips; then
......
......@@ -43,9 +43,6 @@ OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \
arm/dsputil_init_vfp.o \
$(VFP-OBJS-yes)
OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \
arm/mpegvideo_iwmmxt.o \
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
arm/fft_fixed_neon.o \
......
......@@ -28,6 +28,5 @@ void ff_dsputil_init_armv5te(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
#endif /* AVCODEC_ARM_DSPUTIL_H */
......@@ -119,7 +119,6 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx);
if (HAVE_IWMMXT) ff_dsputil_init_iwmmxt(c, avctx);
if (HAVE_ARMVFP) ff_dsputil_init_vfp(c, avctx);
if (HAVE_NEON) ff_dsputil_init_neon(c, avctx);
}
/*
* iWMMXt optimized DSP utils
* Copyright (c) 2004 AGAWA Koji
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "libavcodec/dsputil.h"
#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
#define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2b"
#include "dsputil_iwmmxt_rnd_template.c"
#undef DEF
#undef SET_RND
#undef WAVG2B
#define DEF(x, y) x ## _ ## y ##_iwmmxt
#define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2br"
#include "dsputil_iwmmxt_rnd_template.c"
#undef DEF
#undef SET_RND
#undef WAVG2BR
// need scheduling
#define OP(AVG) \
__asm__ volatile ( \
/* alignment */ \
"and r12, %[pixels], #7 \n\t" \
"bic %[pixels], %[pixels], #7 \n\t" \
"tmcr wcgr1, r12 \n\t" \
\
"wldrd wr0, [%[pixels]] \n\t" \
"wldrd wr1, [%[pixels], #8] \n\t" \
"add %[pixels], %[pixels], %[line_size] \n\t" \
"walignr1 wr4, wr0, wr1 \n\t" \
\
"1: \n\t" \
\
"wldrd wr2, [%[pixels]] \n\t" \
"wldrd wr3, [%[pixels], #8] \n\t" \
"add %[pixels], %[pixels], %[line_size] \n\t" \
"pld [%[pixels]] \n\t" \
"walignr1 wr5, wr2, wr3 \n\t" \
AVG " wr6, wr4, wr5 \n\t" \
"wstrd wr6, [%[block]] \n\t" \
"add %[block], %[block], %[line_size] \n\t" \
\
"wldrd wr0, [%[pixels]] \n\t" \
"wldrd wr1, [%[pixels], #8] \n\t" \
"add %[pixels], %[pixels], %[line_size] \n\t" \
"walignr1 wr4, wr0, wr1 \n\t" \
"pld [%[pixels]] \n\t" \
AVG " wr6, wr4, wr5 \n\t" \
"wstrd wr6, [%[block]] \n\t" \
"add %[block], %[block], %[line_size] \n\t" \
\
"subs %[h], %[h], #2 \n\t" \
"bne 1b \n\t" \
: [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \
: [line_size]"r"(line_size) \
: "memory", "r12");
void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
OP("wavg2br");
}
void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
OP("wavg2b");
}
#undef OP
void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
{
uint8_t *pixels2 = pixels + line_size;
__asm__ volatile (
"mov r12, #4 \n\t"
"1: \n\t"
"pld [%[pixels], %[line_size2]] \n\t"
"pld [%[pixels2], %[line_size2]] \n\t"
"wldrd wr4, [%[pixels]] \n\t"
"wldrd wr5, [%[pixels2]] \n\t"
"pld [%[block], #32] \n\t"
"wunpckelub wr6, wr4 \n\t"
"wldrd wr0, [%[block]] \n\t"
"wunpckehub wr7, wr4 \n\t"
"wldrd wr1, [%[block], #8] \n\t"
"wunpckelub wr8, wr5 \n\t"
"wldrd wr2, [%[block], #16] \n\t"
"wunpckehub wr9, wr5 \n\t"
"wldrd wr3, [%[block], #24] \n\t"
"add %[block], %[block], #32 \n\t"
"waddhss wr10, wr0, wr6 \n\t"
"waddhss wr11, wr1, wr7 \n\t"
"waddhss wr12, wr2, wr8 \n\t"
"waddhss wr13, wr3, wr9 \n\t"
"wpackhus wr14, wr10, wr11 \n\t"
"wpackhus wr15, wr12, wr13 \n\t"
"wstrd wr14, [%[pixels]] \n\t"
"add %[pixels], %[pixels], %[line_size2] \n\t"
"subs r12, r12, #1 \n\t"
"wstrd wr15, [%[pixels2]] \n\t"
"add %[pixels2], %[pixels2], %[line_size2] \n\t"
"bne 1b \n\t"
: [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
: [line_size2]"r"(line_size << 1)
: "cc", "memory", "r12");
}
static void clear_blocks_iwmmxt(DCTELEM *blocks)
{
__asm__ volatile(
"wzero wr0 \n\t"
"mov r1, #(128 * 6 / 32) \n\t"
"1: \n\t"
"wstrd wr0, [%0] \n\t"
"wstrd wr0, [%0, #8] \n\t"
"wstrd wr0, [%0, #16] \n\t"
"wstrd wr0, [%0, #24] \n\t"
"subs r1, r1, #1 \n\t"
"add %0, %0, #32 \n\t"
"bne 1b \n\t"
: "+r"(blocks)
:
: "r1"
);
}
static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
return;
}
/* A run time test is not simple. If this file is compiled in
* then we should install the functions
*/
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
mm_flags |= (avctx->dsp_mask & 0xffff);
else
mm_flags &= ~(avctx->dsp_mask & 0xffff);
}
if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return;
c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
if (!high_bit_depth) {
c->clear_blocks = clear_blocks_iwmmxt;
c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
}
}
This diff is collapsed.
......@@ -40,16 +40,9 @@ void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block,
void ff_MPV_common_init_arm(MpegEncContext *s)
{
/* IWMMXT support is a superset of armv5te, so
* allow optimized functions for armv5te unless
* a better iwmmxt function exists
*/
#if HAVE_ARMV5TE
ff_MPV_common_init_armv5te(s);
#endif
#if HAVE_IWMMXT
ff_MPV_common_init_iwmmxt(s);
#endif
if (HAVE_NEON) {
s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_neon;
......
......@@ -21,7 +21,6 @@
#include "libavcodec/mpegvideo.h"
void ff_MPV_common_init_iwmmxt(MpegEncContext *s);
void ff_MPV_common_init_armv5te(MpegEncContext *s);
#endif /* AVCODEC_ARM_MPEGVIDEO_H */
/*
* copyright (c) 2004 AGAWA Koji
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h"
#include "mpegvideo_arm.h"
static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int level, qmul, qadd;
int nCoeffs;
DCTELEM *block_orig = block;
assert(s->block_last_index[n]>=0);
qmul = qscale << 1;
if (!s->h263_aic) {
if (n < 4)
level = block[0] * s->y_dc_scale;
else
level = block[0] * s->c_dc_scale;
qadd = (qscale - 1) | 1;
}else{
qadd = 0;
level = block[0];
}
if(s->ac_pred)
nCoeffs=63;
else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
__asm__ volatile (
/* "movd %1, %%mm6 \n\t" //qmul */
/* "packssdw %%mm6, %%mm6 \n\t" */
/* "packssdw %%mm6, %%mm6 \n\t" */
"tbcsth wr6, %[qmul] \n\t"
/* "movd %2, %%mm5 \n\t" //qadd */
/* "packssdw %%mm5, %%mm5 \n\t" */
/* "packssdw %%mm5, %%mm5 \n\t" */
"tbcsth wr5, %[qadd] \n\t"
"wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */
"wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */
"wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */
"1: \n\t"
"wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */
"wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */
"wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */
"wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */
/* "movq (%0, %3), %%mm2 \n\t" */
/* "movq 8(%0, %3), %%mm3 \n\t" */
"wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */
"wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */
"wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */
"wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */
"waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */
"waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */
"wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */
"wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */
"wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */
"wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */
"wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */
"wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */
"wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */
"wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */
"add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */
"subs %[i], %[i], #1 \n\t"
"bne 1b \n\t" /* "jng 1b \n\t" */
:[block]"+r"(block)
:[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
:"memory");
block_orig[0] = level;
}
void ff_MPV_common_init_iwmmxt(MpegEncContext *s)
{
if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return;
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
}
......@@ -72,7 +72,6 @@ OBJS = adler32.o \
tree.o \
utils.o \
OBJS-$(ARCH_ARM) += arm/cpu.o
OBJS-$(ARCH_PPC) += ppc/cpu.o
OBJS-$(ARCH_X86) += x86/cpu.o
......
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "config.h"
int ff_get_cpu_flags_arm(void)
{
return HAVE_IWMMXT * AV_CPU_FLAG_IWMMXT;
}
......@@ -28,7 +28,6 @@ int av_get_cpu_flags(void)
if (checked)
return flags;
if (ARCH_ARM) flags = ff_get_cpu_flags_arm();
if (ARCH_PPC) flags = ff_get_cpu_flags_ppc();
if (ARCH_X86) flags = ff_get_cpu_flags_x86();
......@@ -53,9 +52,7 @@ static const struct {
int flag;
const char *name;
} cpu_flag_tab[] = {
#if ARCH_ARM
{ AV_CPU_FLAG_IWMMXT, "iwmmxt" },
#elif ARCH_PPC
#if ARCH_PPC
{ AV_CPU_FLAG_ALTIVEC, "altivec" },
#elif ARCH_X86
{ AV_CPU_FLAG_MMX, "mmx" },
......
......@@ -40,7 +40,6 @@
#define AV_CPU_FLAG_AVX 0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used
#define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions
#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
#define AV_CPU_FLAG_IWMMXT 0x0100 ///< XScale IWMMXT
#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
/**
......@@ -57,7 +56,6 @@ int av_get_cpu_flags(void);
void av_set_cpu_flags_mask(int mask);
/* The following CPU-specific functions shall not be called directly. */
int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment