Commit b25a265a authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  pcm-mpeg: convert to bytestream2 API
  Revert "h264: clear trailing bits in partially parsed NAL units"
  remove iwmmxt optimizations
  mimic: do not continue if swap_buf_size is 0
  mimic: convert to bytestream2 API
  frwu: use MKTAG to check marker instead of AV_RL32
  txd: port to bytestream2 API
  c93: convert to bytestream2 API
  iff: make .long_name more descriptive
  FATE: add test for cdxl demuxer
  rtsp: Fix a typo

Conflicts:
	libavcodec/arm/dsputil_iwmmxt.c
	libavcodec/arm/dsputil_iwmmxt_rnd_template.c
	libavcodec/arm/mpegvideo_iwmmxt.c
	libavcodec/c93.c
	libavcodec/txd.c
	libavutil/arm/cpu.c
	tests/fate/demux.mak
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 2d38081b bd3e07c8
...@@ -251,7 +251,6 @@ Advanced options (experts only): ...@@ -251,7 +251,6 @@ Advanced options (experts only):
--disable-armv6 disable armv6 optimizations --disable-armv6 disable armv6 optimizations
--disable-armv6t2 disable armv6t2 optimizations --disable-armv6t2 disable armv6t2 optimizations
--disable-armvfp disable ARM VFP optimizations --disable-armvfp disable ARM VFP optimizations
--disable-iwmmxt disable iwmmxt optimizations
--disable-mmi disable MMI optimizations --disable-mmi disable MMI optimizations
--disable-neon disable NEON optimizations --disable-neon disable NEON optimizations
--disable-vis disable VIS optimizations --disable-vis disable VIS optimizations
...@@ -1132,7 +1131,6 @@ ARCH_EXT_LIST=' ...@@ -1132,7 +1131,6 @@ ARCH_EXT_LIST='
armv6t2 armv6t2
armvfp armvfp
avx avx
iwmmxt
mmi mmi
mmx mmx
mmx2 mmx2
...@@ -1344,7 +1342,6 @@ armv5te_deps="arm" ...@@ -1344,7 +1342,6 @@ armv5te_deps="arm"
armv6_deps="arm" armv6_deps="arm"
armv6t2_deps="arm" armv6t2_deps="arm"
armvfp_deps="arm" armvfp_deps="arm"
iwmmxt_deps="arm"
neon_deps="arm" neon_deps="arm"
vfpv3_deps="armvfp" vfpv3_deps="armvfp"
...@@ -2884,7 +2881,6 @@ EOF ...@@ -2884,7 +2881,6 @@ EOF
enabled armv6 && check_asm armv6 '"sadd16 r0, r0, r0"' enabled armv6 && check_asm armv6 '"sadd16 r0, r0, r0"'
enabled armv6t2 && check_asm armv6t2 '"movt r0, #0"' enabled armv6t2 && check_asm armv6t2 '"movt r0, #0"'
enabled armvfp && check_asm armvfp '"fadds s0, s0, s0"' enabled armvfp && check_asm armvfp '"fadds s0, s0, s0"'
enabled iwmmxt && check_asm iwmmxt '"wunpckelub wr6, wr4"'
enabled neon && check_asm neon '"vadd.i16 q0, q0, q0"' enabled neon && check_asm neon '"vadd.i16 q0, q0, q0"'
enabled vfpv3 && check_asm vfpv3 '"vmov.f32 s0, #1.0"' enabled vfpv3 && check_asm vfpv3 '"vmov.f32 s0, #1.0"'
...@@ -3489,7 +3485,6 @@ if enabled arm; then ...@@ -3489,7 +3485,6 @@ if enabled arm; then
echo "ARMv6 enabled ${armv6-no}" echo "ARMv6 enabled ${armv6-no}"
echo "ARMv6T2 enabled ${armv6t2-no}" echo "ARMv6T2 enabled ${armv6t2-no}"
echo "ARM VFP enabled ${armvfp-no}" echo "ARM VFP enabled ${armvfp-no}"
echo "IWMMXT enabled ${iwmmxt-no}"
echo "NEON enabled ${neon-no}" echo "NEON enabled ${neon-no}"
fi fi
if enabled mips; then if enabled mips; then
......
...@@ -44,9 +44,6 @@ OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \ ...@@ -44,9 +44,6 @@ OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \
arm/dsputil_init_vfp.o \ arm/dsputil_init_vfp.o \
$(VFP-OBJS-yes) $(VFP-OBJS-yes)
OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \
arm/mpegvideo_iwmmxt.o \
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
arm/fft_fixed_neon.o \ arm/fft_fixed_neon.o \
......
...@@ -28,6 +28,5 @@ void ff_dsputil_init_armv5te(DSPContext* c, AVCodecContext *avctx); ...@@ -28,6 +28,5 @@ void ff_dsputil_init_armv5te(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
#endif /* AVCODEC_ARM_DSPUTIL_H */ #endif /* AVCODEC_ARM_DSPUTIL_H */
...@@ -119,7 +119,6 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) ...@@ -119,7 +119,6 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx); if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx); if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx);
if (HAVE_IWMMXT) ff_dsputil_init_iwmmxt(c, avctx);
if (HAVE_ARMVFP) ff_dsputil_init_vfp(c, avctx); if (HAVE_ARMVFP) ff_dsputil_init_vfp(c, avctx);
if (HAVE_NEON) ff_dsputil_init_neon(c, avctx); if (HAVE_NEON) ff_dsputil_init_neon(c, avctx);
} }
/*
* iWMMXt optimized DSP utils
* Copyright (c) 2004 AGAWA Koji
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "libavcodec/dsputil.h"
#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
#define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2b"
#include "dsputil_iwmmxt_rnd_template.c"
#undef DEF
#undef SET_RND
#undef WAVG2B
#define DEF(x, y) x ## _ ## y ##_iwmmxt
#define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
#define WAVG2B "wavg2br"
#include "dsputil_iwmmxt_rnd_template.c"
#undef DEF
#undef SET_RND
#undef WAVG2BR
// need scheduling
#define OP(AVG) \
__asm__ volatile ( \
/* alignment */ \
"and r12, %[pixels], #7 \n\t" \
"bic %[pixels], %[pixels], #7 \n\t" \
"tmcr wcgr1, r12 \n\t" \
\
"wldrd wr0, [%[pixels]] \n\t" \
"wldrd wr1, [%[pixels], #8] \n\t" \
"add %[pixels], %[pixels], %[line_size] \n\t" \
"walignr1 wr4, wr0, wr1 \n\t" \
\
"1: \n\t" \
\
"wldrd wr2, [%[pixels]] \n\t" \
"wldrd wr3, [%[pixels], #8] \n\t" \
"add %[pixels], %[pixels], %[line_size] \n\t" \
"pld [%[pixels]] \n\t" \
"walignr1 wr5, wr2, wr3 \n\t" \
AVG " wr6, wr4, wr5 \n\t" \
"wstrd wr6, [%[block]] \n\t" \
"add %[block], %[block], %[line_size] \n\t" \
\
"wldrd wr0, [%[pixels]] \n\t" \
"wldrd wr1, [%[pixels], #8] \n\t" \
"add %[pixels], %[pixels], %[line_size] \n\t" \
"walignr1 wr4, wr0, wr1 \n\t" \
"pld [%[pixels]] \n\t" \
AVG " wr6, wr4, wr5 \n\t" \
"wstrd wr6, [%[block]] \n\t" \
"add %[block], %[block], %[line_size] \n\t" \
\
"subs %[h], %[h], #2 \n\t" \
"bne 1b \n\t" \
: [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \
: [line_size]"r"(line_size) \
: "memory", "r12");
void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
OP("wavg2br");
}
void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
OP("wavg2b");
}
#undef OP
void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
{
uint8_t *pixels2 = pixels + line_size;
__asm__ volatile (
"mov r12, #4 \n\t"
"1: \n\t"
"pld [%[pixels], %[line_size2]] \n\t"
"pld [%[pixels2], %[line_size2]] \n\t"
"wldrd wr4, [%[pixels]] \n\t"
"wldrd wr5, [%[pixels2]] \n\t"
"pld [%[block], #32] \n\t"
"wunpckelub wr6, wr4 \n\t"
"wldrd wr0, [%[block]] \n\t"
"wunpckehub wr7, wr4 \n\t"
"wldrd wr1, [%[block], #8] \n\t"
"wunpckelub wr8, wr5 \n\t"
"wldrd wr2, [%[block], #16] \n\t"
"wunpckehub wr9, wr5 \n\t"
"wldrd wr3, [%[block], #24] \n\t"
"add %[block], %[block], #32 \n\t"
"waddhss wr10, wr0, wr6 \n\t"
"waddhss wr11, wr1, wr7 \n\t"
"waddhss wr12, wr2, wr8 \n\t"
"waddhss wr13, wr3, wr9 \n\t"
"wpackhus wr14, wr10, wr11 \n\t"
"wpackhus wr15, wr12, wr13 \n\t"
"wstrd wr14, [%[pixels]] \n\t"
"add %[pixels], %[pixels], %[line_size2] \n\t"
"subs r12, r12, #1 \n\t"
"wstrd wr15, [%[pixels2]] \n\t"
"add %[pixels2], %[pixels2], %[line_size2] \n\t"
"bne 1b \n\t"
: [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
: [line_size2]"r"(line_size << 1)
: "cc", "memory", "r12");
}
static void clear_blocks_iwmmxt(DCTELEM *blocks)
{
__asm__ volatile(
"wzero wr0 \n\t"
"mov r1, #(128 * 6 / 32) \n\t"
"1: \n\t"
"wstrd wr0, [%0] \n\t"
"wstrd wr0, [%0, #8] \n\t"
"wstrd wr0, [%0, #16] \n\t"
"wstrd wr0, [%0, #24] \n\t"
"subs r1, r1, #1 \n\t"
"add %0, %0, #32 \n\t"
"bne 1b \n\t"
: "+r"(blocks)
:
: "r1"
);
}
static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
{
return;
}
/* A run time test is not simple. If this file is compiled in
* then we should install the functions
*/
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
mm_flags |= (avctx->dsp_mask & 0xffff);
else
mm_flags &= ~(avctx->dsp_mask & 0xffff);
}
if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return;
c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
if (!high_bit_depth) {
c->clear_blocks = clear_blocks_iwmmxt;
c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
}
}
/*
* iWMMXt optimized DSP utils
* copyright (c) 2004 AGAWA Koji
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
__asm__ volatile (
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r4, %[pixels], %[line_size] \n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"1: \n\t"
"wldrd wr0, [%[pixels]] \n\t"
"subs %[h], %[h], #2 \n\t"
"wldrd wr1, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr3, [r4] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"wldrd wr4, [r4, #8] \n\t"
"add r4, r4, %[line_size] \n\t"
"walignr1 wr8, wr0, wr1 \n\t"
"pld [r4] \n\t"
"pld [r4, #32] \n\t"
"walignr1 wr10, wr3, wr4 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wstrd wr10, [r5] \n\t"
"add r5, r5, %[line_size] \n\t"
"bne 1b \n\t"
: [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
:
: "memory", "r4", "r5", "r12");
}
void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
__asm__ volatile (
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r4, %[pixels], %[line_size] \n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"1: \n\t"
"wldrd wr0, [%[pixels]] \n\t"
"subs %[h], %[h], #2 \n\t"
"wldrd wr1, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr3, [r4] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"wldrd wr4, [r4, #8] \n\t"
"add r4, r4, %[line_size] \n\t"
"walignr1 wr8, wr0, wr1 \n\t"
"wldrd wr0, [%[block]] \n\t"
"wldrd wr2, [r5] \n\t"
"pld [r4] \n\t"
"pld [r4, #32] \n\t"
"walignr1 wr10, wr3, wr4 \n\t"
WAVG2B" wr8, wr8, wr0 \n\t"
WAVG2B" wr10, wr10, wr2 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wstrd wr10, [r5] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"add r5, r5, %[line_size] \n\t"
"pld [r5] \n\t"
"pld [r5, #32] \n\t"
"bne 1b \n\t"
: [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
:
: "memory", "r4", "r5", "r12");
}
void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
__asm__ volatile (
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r4, %[pixels], %[line_size] \n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"1: \n\t"
"wldrd wr0, [%[pixels]] \n\t"
"wldrd wr1, [%[pixels], #8] \n\t"
"subs %[h], %[h], #2 \n\t"
"wldrd wr2, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr3, [r4] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr8, wr0, wr1 \n\t"
"wldrd wr4, [r4, #8] \n\t"
"walignr1 wr9, wr1, wr2 \n\t"
"wldrd wr5, [r4, #16] \n\t"
"add r4, r4, %[line_size] \n\t"
"pld [r4] \n\t"
"pld [r4, #32] \n\t"
"walignr1 wr10, wr3, wr4 \n\t"
"wstrd wr8, [%[block]] \n\t"
"walignr1 wr11, wr4, wr5 \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wstrd wr10, [r5] \n\t"
"wstrd wr11, [r5, #8] \n\t"
"add r5, r5, %[line_size] \n\t"
"bne 1b \n\t"
: [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
:
: "memory", "r4", "r5", "r12");
}
void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
__asm__ volatile (
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r4, %[pixels], %[line_size]\n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"1: \n\t"
"wldrd wr0, [%[pixels]] \n\t"
"wldrd wr1, [%[pixels], #8] \n\t"
"subs %[h], %[h], #2 \n\t"
"wldrd wr2, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr3, [r4] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr8, wr0, wr1 \n\t"
"wldrd wr4, [r4, #8] \n\t"
"walignr1 wr9, wr1, wr2 \n\t"
"wldrd wr5, [r4, #16] \n\t"
"add r4, r4, %[line_size] \n\t"
"wldrd wr0, [%[block]] \n\t"
"pld [r4] \n\t"
"wldrd wr1, [%[block], #8] \n\t"
"pld [r4, #32] \n\t"
"wldrd wr2, [r5] \n\t"
"walignr1 wr10, wr3, wr4 \n\t"
"wldrd wr3, [r5, #8] \n\t"
WAVG2B" wr8, wr8, wr0 \n\t"
WAVG2B" wr9, wr9, wr1 \n\t"
WAVG2B" wr10, wr10, wr2 \n\t"
"wstrd wr8, [%[block]] \n\t"
"walignr1 wr11, wr4, wr5 \n\t"
WAVG2B" wr11, wr11, wr3 \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wstrd wr10, [r5] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"wstrd wr11, [r5, #8] \n\t"
"add r5, r5, %[line_size] \n\t"
"pld [r5] \n\t"
"pld [r5, #32] \n\t"
"bne 1b \n\t"
: [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
:
: "memory", "r4", "r5", "r12");
}
void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r12, r12, #1 \n\t"
"add r4, %[pixels], %[line_size]\n\t"
"tmcr wcgr2, r12 \n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"1: \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr13, [r4] \n\t"
"pld [%[pixels]] \n\t"
"wldrd wr14, [r4, #8] \n\t"
"pld [%[pixels], #32] \n\t"
"add r4, r4, %[line_size] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"pld [r4] \n\t"
"pld [r4, #32] \n\t"
"walignr1 wr2, wr13, wr14 \n\t"
"wmoveq wr4, wr11 \n\t"
"wmoveq wr6, wr14 \n\t"
"walignr2ne wr4, wr10, wr11 \n\t"
"walignr2ne wr6, wr13, wr14 \n\t"
WAVG2B" wr0, wr0, wr4 \n\t"
WAVG2B" wr2, wr2, wr6 \n\t"
"wstrd wr0, [%[block]] \n\t"
"subs %[h], %[h], #2 \n\t"
"wstrd wr2, [r5] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"add r5, r5, %[line_size] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
:
: "r4", "r5", "r12", "memory");
}
void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r12, r12, #1 \n\t"
"add r4, %[pixels], %[line_size]\n\t"
"tmcr wcgr2, r12 \n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"1: \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr13, [r4] \n\t"
"pld [%[pixels]] \n\t"
"wldrd wr14, [r4, #8] \n\t"
"pld [%[pixels], #32] \n\t"
"wldrd wr15, [r4, #16] \n\t"
"add r4, r4, %[line_size] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"pld [r4] \n\t"
"pld [r4, #32] \n\t"
"walignr1 wr1, wr11, wr12 \n\t"
"walignr1 wr2, wr13, wr14 \n\t"
"walignr1 wr3, wr14, wr15 \n\t"
"wmoveq wr4, wr11 \n\t"
"wmoveq wr5, wr12 \n\t"
"wmoveq wr6, wr14 \n\t"
"wmoveq wr7, wr15 \n\t"
"walignr2ne wr4, wr10, wr11 \n\t"
"walignr2ne wr5, wr11, wr12 \n\t"
"walignr2ne wr6, wr13, wr14 \n\t"
"walignr2ne wr7, wr14, wr15 \n\t"
WAVG2B" wr0, wr0, wr4 \n\t"
WAVG2B" wr1, wr1, wr5 \n\t"
"wstrd wr0, [%[block]] \n\t"
WAVG2B" wr2, wr2, wr6 \n\t"
"wstrd wr1, [%[block], #8] \n\t"
WAVG2B" wr3, wr3, wr7 \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wstrd wr2, [r5] \n\t"
"subs %[h], %[h], #2 \n\t"
"wstrd wr3, [r5, #8] \n\t"
"add r5, r5, %[line_size] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
:
: "r4", "r5", "r12", "memory");
}
void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r12, r12, #1 \n\t"
"add r4, %[pixels], %[line_size]\n\t"
"tmcr wcgr2, r12 \n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"pld [r5] \n\t"
"pld [r5, #32] \n\t"
"1: \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr13, [r4] \n\t"
"pld [%[pixels]] \n\t"
"wldrd wr14, [r4, #8] \n\t"
"pld [%[pixels], #32] \n\t"
"add r4, r4, %[line_size] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"pld [r4] \n\t"
"pld [r4, #32] \n\t"
"walignr1 wr2, wr13, wr14 \n\t"
"wmoveq wr4, wr11 \n\t"
"wmoveq wr6, wr14 \n\t"
"walignr2ne wr4, wr10, wr11 \n\t"
"wldrd wr10, [%[block]] \n\t"
"walignr2ne wr6, wr13, wr14 \n\t"
"wldrd wr12, [r5] \n\t"
WAVG2B" wr0, wr0, wr4 \n\t"
WAVG2B" wr2, wr2, wr6 \n\t"
WAVG2B" wr0, wr0, wr10 \n\t"
WAVG2B" wr2, wr2, wr12 \n\t"
"wstrd wr0, [%[block]] \n\t"
"subs %[h], %[h], #2 \n\t"
"wstrd wr2, [r5] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"add r5, r5, %[line_size] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"pld [r5] \n\t"
"pld [r5, #32] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
:
: "r4", "r5", "r12", "memory");
}
void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r12, r12, #1 \n\t"
"add r4, %[pixels], %[line_size]\n\t"
"tmcr wcgr2, r12 \n\t"
"add r5, %[block], %[line_size] \n\t"
"mov %[line_size], %[line_size], lsl #1 \n\t"
"pld [r5] \n\t"
"pld [r5, #32] \n\t"
"1: \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"wldrd wr13, [r4] \n\t"
"pld [%[pixels]] \n\t"
"wldrd wr14, [r4, #8] \n\t"
"pld [%[pixels], #32] \n\t"
"wldrd wr15, [r4, #16] \n\t"
"add r4, r4, %[line_size] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"pld [r4] \n\t"
"pld [r4, #32] \n\t"
"walignr1 wr1, wr11, wr12 \n\t"
"walignr1 wr2, wr13, wr14 \n\t"
"walignr1 wr3, wr14, wr15 \n\t"
"wmoveq wr4, wr11 \n\t"
"wmoveq wr5, wr12 \n\t"
"wmoveq wr6, wr14 \n\t"
"wmoveq wr7, wr15 \n\t"
"walignr2ne wr4, wr10, wr11 \n\t"
"walignr2ne wr5, wr11, wr12 \n\t"
"walignr2ne wr6, wr13, wr14 \n\t"
"walignr2ne wr7, wr14, wr15 \n\t"
"wldrd wr10, [%[block]] \n\t"
WAVG2B" wr0, wr0, wr4 \n\t"
"wldrd wr11, [%[block], #8] \n\t"
WAVG2B" wr1, wr1, wr5 \n\t"
"wldrd wr12, [r5] \n\t"
WAVG2B" wr2, wr2, wr6 \n\t"
"wldrd wr13, [r5, #8] \n\t"
WAVG2B" wr3, wr3, wr7 \n\t"
WAVG2B" wr0, wr0, wr10 \n\t"
WAVG2B" wr1, wr1, wr11 \n\t"
WAVG2B" wr2, wr2, wr12 \n\t"
WAVG2B" wr3, wr3, wr13 \n\t"
"wstrd wr0, [%[block]] \n\t"
"subs %[h], %[h], #2 \n\t"
"wstrd wr1, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wstrd wr2, [r5] \n\t"
"pld [%[block]] \n\t"
"wstrd wr3, [r5, #8] \n\t"
"add r5, r5, %[line_size] \n\t"
"pld [%[block], #32] \n\t"
"pld [r5] \n\t"
"pld [r5, #32] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
:
:"r4", "r5", "r12", "memory");
}
void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
__asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"pld [%[block]] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"1: \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr4, wr10, wr11 \n\t"
"wldrd wr10, [%[block]] \n\t"
WAVG2B" wr8, wr0, wr4 \n\t"
WAVG2B" wr8, wr8, wr10 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"pld [%[block]] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"wldrd wr10, [%[block]] \n\t"
WAVG2B" wr8, wr0, wr4 \n\t"
WAVG2B" wr8, wr8, wr10 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"subs %[h], %[h], #2 \n\t"
"pld [%[block]] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
:
: "cc", "memory", "r12");
}
void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
__asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"walignr1 wr1, wr11, wr12 \n\t"
"1: \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr4, wr10, wr11 \n\t"
"walignr1 wr5, wr11, wr12 \n\t"
WAVG2B" wr8, wr0, wr4 \n\t"
WAVG2B" wr9, wr1, wr5 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"walignr1 wr1, wr11, wr12 \n\t"
WAVG2B" wr8, wr0, wr4 \n\t"
WAVG2B" wr9, wr1, wr5 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"subs %[h], %[h], #2 \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
:
: "r4", "r5", "r12", "memory");
}
void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
int stride = line_size;
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
__asm__ volatile(
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"and r12, %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"pld [%[block]] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"walignr1 wr1, wr11, wr12 \n\t"
"1: \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr4, wr10, wr11 \n\t"
"walignr1 wr5, wr11, wr12 \n\t"
"wldrd wr10, [%[block]] \n\t"
"wldrd wr11, [%[block], #8] \n\t"
WAVG2B" wr8, wr0, wr4 \n\t"
WAVG2B" wr9, wr1, wr5 \n\t"
WAVG2B" wr8, wr8, wr10 \n\t"
WAVG2B" wr9, wr9, wr11 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wldrd wr10, [%[pixels]] \n\t"
"wldrd wr11, [%[pixels], #8] \n\t"
"pld [%[block]] \n\t"
"wldrd wr12, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr0, wr10, wr11 \n\t"
"walignr1 wr1, wr11, wr12 \n\t"
"wldrd wr10, [%[block]] \n\t"
"wldrd wr11, [%[block], #8] \n\t"
WAVG2B" wr8, wr0, wr4 \n\t"
WAVG2B" wr9, wr1, wr5 \n\t"
WAVG2B" wr8, wr8, wr10 \n\t"
WAVG2B" wr9, wr9, wr11 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"subs %[h], %[h], #2 \n\t"
"pld [%[block]] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
:
: "r4", "r5", "r12", "memory");
}
void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[pixels]] \n\t"
"mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t"
"tmcr wcgr0, r12 \n\t" /* for shift value */
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr12, [%[pixels]] \n\t"
"add r12, r12, #1 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"tmcr wcgr2, r12 \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"cmp r12, #8 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"wmoveq wr10, wr13 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"1: \n\t"
// [wr0 wr1 wr2 wr3]
// [wr4 wr5 wr6 wr7] <= *
"wldrd wr12, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr6, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"wmoveq wr10, wr13 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"wunpckelub wr4, wr6 \n\t"
"wunpckehub wr5, wr6 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"waddhus wr4, wr4, wr8 \n\t"
"waddhus wr5, wr5, wr9 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr12, [%[pixels]] \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"wmoveq wr10, wr13 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
"subs %[h], %[h], #2 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
: [line_size]"r"(line_size)
: "r12", "memory");
}
void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[pixels]] \n\t"
"mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t"
"tmcr wcgr0, r12 \n\t" /* for shift value */
/* alignment */
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r12, r12, #1 \n\t"
"tmcr wcgr2, r12 \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr12, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"wldrd wr14, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr3, wr13, wr14 \n\t"
"wmoveq wr10, wr13 \n\t"
"wmoveq wr11, wr14 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"walignr2ne wr11, wr13, wr14 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr2, wr3 \n\t"
"wunpckehub wr3, wr3 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"wunpckelub wr10, wr11 \n\t"
"wunpckehub wr11, wr11 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"waddhus wr2, wr2, wr10 \n\t"
"waddhus wr3, wr3, wr11 \n\t"
"1: \n\t"
// [wr0 wr1 wr2 wr3]
// [wr4 wr5 wr6 wr7] <= *
"wldrd wr12, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"wldrd wr14, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr6, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr7, wr13, wr14 \n\t"
"wmoveq wr10, wr13 \n\t"
"wmoveq wr11, wr14 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"walignr2ne wr11, wr13, wr14 \n\t"
"wunpckelub wr4, wr6 \n\t"
"wunpckehub wr5, wr6 \n\t"
"wunpckelub wr6, wr7 \n\t"
"wunpckehub wr7, wr7 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"wunpckelub wr10, wr11 \n\t"
"wunpckehub wr11, wr11 \n\t"
"waddhus wr4, wr4, wr8 \n\t"
"waddhus wr5, wr5, wr9 \n\t"
"waddhus wr6, wr6, wr10 \n\t"
"waddhus wr7, wr7, wr11 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr10, wr2, wr6 \n\t"
"waddhus wr11, wr3, wr7 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"waddhus wr10, wr10, wr15 \n\t"
"waddhus wr11, wr11, wr15 \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wsrlhg wr10, wr10, wcgr0 \n\t"
"wsrlhg wr11, wr11, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
"wpackhus wr9, wr10, wr11 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr12, [%[pixels]] \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"wldrd wr14, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr3, wr13, wr14 \n\t"
"wmoveq wr10, wr13 \n\t"
"wmoveq wr11, wr14 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"walignr2ne wr11, wr13, wr14 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr2, wr3 \n\t"
"wunpckehub wr3, wr3 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"wunpckelub wr10, wr11 \n\t"
"wunpckehub wr11, wr11 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"waddhus wr2, wr2, wr10 \n\t"
"waddhus wr3, wr3, wr11 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr10, wr2, wr6 \n\t"
"waddhus wr11, wr3, wr7 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"waddhus wr10, wr10, wr15 \n\t"
"waddhus wr11, wr11, wr15 \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wsrlhg wr10, wr10, wcgr0 \n\t"
"wsrlhg wr11, wr11, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
"wpackhus wr9, wr10, wr11 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"subs %[h], %[h], #2 \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
: [line_size]"r"(line_size)
: "r12", "memory");
}
void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"pld [%[pixels]] \n\t"
"mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t"
"tmcr wcgr0, r12 \n\t" /* for shift value */
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr12, [%[pixels]] \n\t"
"add r12, r12, #1 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"tmcr wcgr2, r12 \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"cmp r12, #8 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"wmoveq wr10, wr13 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"1: \n\t"
// [wr0 wr1 wr2 wr3]
// [wr4 wr5 wr6 wr7] <= *
"wldrd wr12, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr6, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"wmoveq wr10, wr13 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"wunpckelub wr4, wr6 \n\t"
"wunpckehub wr5, wr6 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"waddhus wr4, wr4, wr8 \n\t"
"waddhus wr5, wr5, wr9 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"wldrd wr12, [%[block]] \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
WAVG2B" wr8, wr8, wr12 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"wldrd wr12, [%[pixels]] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr13, [%[pixels], #8] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"wmoveq wr10, wr13 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"wldrd wr12, [%[block]] \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
"subs %[h], %[h], #2 \n\t"
WAVG2B" wr8, wr8, wr12 \n\t"
"wstrd wr8, [%[block]] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
: [line_size]"r"(line_size)
: "r12", "memory");
}
void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
{
// [wr0 wr1 wr2 wr3] for previous line
// [wr4 wr5 wr6 wr7] for current line
SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
__asm__ volatile(
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"pld [%[pixels]] \n\t"
"mov r12, #2 \n\t"
"pld [%[pixels], #32] \n\t"
"tmcr wcgr0, r12 \n\t" /* for shift value */
/* alignment */
"and r12, %[pixels], #7 \n\t"
"bic %[pixels], %[pixels], #7 \n\t"
"tmcr wcgr1, r12 \n\t"
"add r12, r12, #1 \n\t"
"tmcr wcgr2, r12 \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr12, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"wldrd wr14, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"pld [%[pixels]] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr3, wr13, wr14 \n\t"
"wmoveq wr10, wr13 \n\t"
"wmoveq wr11, wr14 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"walignr2ne wr11, wr13, wr14 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr2, wr3 \n\t"
"wunpckehub wr3, wr3 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"wunpckelub wr10, wr11 \n\t"
"wunpckehub wr11, wr11 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"waddhus wr2, wr2, wr10 \n\t"
"waddhus wr3, wr3, wr11 \n\t"
"1: \n\t"
// [wr0 wr1 wr2 wr3]
// [wr4 wr5 wr6 wr7] <= *
"wldrd wr12, [%[pixels]] \n\t"
"cmp r12, #8 \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"wldrd wr14, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr6, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr7, wr13, wr14 \n\t"
"wmoveq wr10, wr13 \n\t"
"wmoveq wr11, wr14 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"walignr2ne wr11, wr13, wr14 \n\t"
"wunpckelub wr4, wr6 \n\t"
"wunpckehub wr5, wr6 \n\t"
"wunpckelub wr6, wr7 \n\t"
"wunpckehub wr7, wr7 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"wunpckelub wr10, wr11 \n\t"
"wunpckehub wr11, wr11 \n\t"
"waddhus wr4, wr4, wr8 \n\t"
"waddhus wr5, wr5, wr9 \n\t"
"waddhus wr6, wr6, wr10 \n\t"
"waddhus wr7, wr7, wr11 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr10, wr2, wr6 \n\t"
"waddhus wr11, wr3, wr7 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"waddhus wr10, wr10, wr15 \n\t"
"waddhus wr11, wr11, wr15 \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wldrd wr12, [%[block]] \n\t"
"wldrd wr13, [%[block], #8] \n\t"
"wsrlhg wr10, wr10, wcgr0 \n\t"
"wsrlhg wr11, wr11, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
"wpackhus wr9, wr10, wr11 \n\t"
WAVG2B" wr8, wr8, wr12 \n\t"
WAVG2B" wr9, wr9, wr13 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
// [wr0 wr1 wr2 wr3] <= *
// [wr4 wr5 wr6 wr7]
"wldrd wr12, [%[pixels]] \n\t"
"pld [%[block]] \n\t"
"wldrd wr13, [%[pixels], #8] \n\t"
"pld [%[block], #32] \n\t"
"wldrd wr14, [%[pixels], #16] \n\t"
"add %[pixels], %[pixels], %[line_size] \n\t"
"walignr1 wr2, wr12, wr13 \n\t"
"pld [%[pixels]] \n\t"
"pld [%[pixels], #32] \n\t"
"walignr1 wr3, wr13, wr14 \n\t"
"wmoveq wr10, wr13 \n\t"
"wmoveq wr11, wr14 \n\t"
"walignr2ne wr10, wr12, wr13 \n\t"
"walignr2ne wr11, wr13, wr14 \n\t"
"wunpckelub wr0, wr2 \n\t"
"wunpckehub wr1, wr2 \n\t"
"wunpckelub wr2, wr3 \n\t"
"wunpckehub wr3, wr3 \n\t"
"wunpckelub wr8, wr10 \n\t"
"wunpckehub wr9, wr10 \n\t"
"wunpckelub wr10, wr11 \n\t"
"wunpckehub wr11, wr11 \n\t"
"waddhus wr0, wr0, wr8 \n\t"
"waddhus wr1, wr1, wr9 \n\t"
"waddhus wr2, wr2, wr10 \n\t"
"waddhus wr3, wr3, wr11 \n\t"
"waddhus wr8, wr0, wr4 \n\t"
"waddhus wr9, wr1, wr5 \n\t"
"waddhus wr10, wr2, wr6 \n\t"
"waddhus wr11, wr3, wr7 \n\t"
"waddhus wr8, wr8, wr15 \n\t"
"waddhus wr9, wr9, wr15 \n\t"
"waddhus wr10, wr10, wr15 \n\t"
"waddhus wr11, wr11, wr15 \n\t"
"wsrlhg wr8, wr8, wcgr0 \n\t"
"wsrlhg wr9, wr9, wcgr0 \n\t"
"wldrd wr12, [%[block]] \n\t"
"wldrd wr13, [%[block], #8] \n\t"
"wsrlhg wr10, wr10, wcgr0 \n\t"
"wsrlhg wr11, wr11, wcgr0 \n\t"
"wpackhus wr8, wr8, wr9 \n\t"
"wpackhus wr9, wr10, wr11 \n\t"
WAVG2B" wr8, wr8, wr12 \n\t"
WAVG2B" wr9, wr9, wr13 \n\t"
"wstrd wr8, [%[block]] \n\t"
"wstrd wr9, [%[block], #8] \n\t"
"add %[block], %[block], %[line_size] \n\t"
"subs %[h], %[h], #2 \n\t"
"pld [%[block]] \n\t"
"pld [%[block], #32] \n\t"
"bne 1b \n\t"
: [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
: [line_size]"r"(line_size)
: "r12", "memory");
}
...@@ -40,16 +40,9 @@ void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block, ...@@ -40,16 +40,9 @@ void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block,
void ff_MPV_common_init_arm(MpegEncContext *s) void ff_MPV_common_init_arm(MpegEncContext *s)
{ {
/* IWMMXT support is a superset of armv5te, so
* allow optimized functions for armv5te unless
* a better iwmmxt function exists
*/
#if HAVE_ARMV5TE #if HAVE_ARMV5TE
ff_MPV_common_init_armv5te(s); ff_MPV_common_init_armv5te(s);
#endif #endif
#if HAVE_IWMMXT
ff_MPV_common_init_iwmmxt(s);
#endif
if (HAVE_NEON) { if (HAVE_NEON) {
s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_neon; s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_neon;
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
#include "libavcodec/mpegvideo.h" #include "libavcodec/mpegvideo.h"
void ff_MPV_common_init_iwmmxt(MpegEncContext *s);
void ff_MPV_common_init_armv5te(MpegEncContext *s); void ff_MPV_common_init_armv5te(MpegEncContext *s);
#endif /* AVCODEC_ARM_MPEGVIDEO_H */ #endif /* AVCODEC_ARM_MPEGVIDEO_H */
/*
* copyright (c) 2004 AGAWA Koji
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h"
#include "mpegvideo_arm.h"
static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s,
DCTELEM *block, int n, int qscale)
{
int level, qmul, qadd;
int nCoeffs;
DCTELEM *block_orig = block;
assert(s->block_last_index[n]>=0);
qmul = qscale << 1;
if (!s->h263_aic) {
if (n < 4)
level = block[0] * s->y_dc_scale;
else
level = block[0] * s->c_dc_scale;
qadd = (qscale - 1) | 1;
}else{
qadd = 0;
level = block[0];
}
if(s->ac_pred)
nCoeffs=63;
else
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
__asm__ volatile (
/* "movd %1, %%mm6 \n\t" //qmul */
/* "packssdw %%mm6, %%mm6 \n\t" */
/* "packssdw %%mm6, %%mm6 \n\t" */
"tbcsth wr6, %[qmul] \n\t"
/* "movd %2, %%mm5 \n\t" //qadd */
/* "packssdw %%mm5, %%mm5 \n\t" */
/* "packssdw %%mm5, %%mm5 \n\t" */
"tbcsth wr5, %[qadd] \n\t"
"wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */
"wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */
"wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */
"1: \n\t"
"wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */
"wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */
"wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */
"wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */
/* "movq (%0, %3), %%mm2 \n\t" */
/* "movq 8(%0, %3), %%mm3 \n\t" */
"wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */
"wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */
"wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */
"wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */
"waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */
"waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */
"wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */
"wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */
"wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */
"wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */
"wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */
"wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */
"wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */
"wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */
"add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */
"subs %[i], %[i], #1 \n\t"
"bne 1b \n\t" /* "jng 1b \n\t" */
:[block]"+r"(block)
:[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd)
:"memory");
block_orig[0] = level;
}
void ff_MPV_common_init_iwmmxt(MpegEncContext *s)
{
if (!(mm_flags & AV_CPU_FLAG_IWMMXT)) return;
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt;
}
...@@ -125,8 +125,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -125,8 +125,9 @@ static int decode_frame(AVCodecContext *avctx, void *data,
AVFrame * const newpic = &c93->pictures[c93->currentpic]; AVFrame * const newpic = &c93->pictures[c93->currentpic];
AVFrame * const oldpic = &c93->pictures[c93->currentpic^1]; AVFrame * const oldpic = &c93->pictures[c93->currentpic^1];
AVFrame *picture = data; AVFrame *picture = data;
GetByteContext gb;
uint8_t *out; uint8_t *out;
int stride, i, x, y, bt = 0; int stride, i, x, y, b, bt = 0;
c93->currentpic ^= 1; c93->currentpic ^= 1;
...@@ -140,7 +141,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -140,7 +141,9 @@ static int decode_frame(AVCodecContext *avctx, void *data,
stride = newpic->linesize[0]; stride = newpic->linesize[0];
if (buf[0] & C93_FIRST_FRAME) { bytestream2_init(&gb, buf, buf_size);
b = bytestream2_get_byte(&gb);
if (b & C93_FIRST_FRAME) {
newpic->pict_type = AV_PICTURE_TYPE_I; newpic->pict_type = AV_PICTURE_TYPE_I;
newpic->key_frame = 1; newpic->key_frame = 1;
} else { } else {
...@@ -148,17 +151,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -148,17 +151,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
newpic->key_frame = 0; newpic->key_frame = 0;
} }
if (*buf++ & C93_HAS_PALETTE) {
uint32_t *palette = (uint32_t *) newpic->data[1];
const uint8_t *palbuf = buf + buf_size - 768 - 1;
for (i = 0; i < 256; i++) {
palette[i] = 0xFF << 24 | bytestream_get_be24(&palbuf);
}
} else {
if (oldpic->data[1])
memcpy(newpic->data[1], oldpic->data[1], 256 * 4);
}
for (y = 0; y < HEIGHT; y += 8) { for (y = 0; y < HEIGHT; y += 8) {
out = newpic->data[0] + y * stride; out = newpic->data[0] + y * stride;
for (x = 0; x < WIDTH; x += 8) { for (x = 0; x < WIDTH; x += 8) {
...@@ -168,12 +160,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -168,12 +160,12 @@ static int decode_frame(AVCodecContext *avctx, void *data,
C93BlockType block_type; C93BlockType block_type;
if (!bt) if (!bt)
bt = *buf++; bt = bytestream2_get_byte(&gb);
block_type= bt & 0x0F; block_type= bt & 0x0F;
switch (block_type) { switch (block_type) {
case C93_8X8_FROM_PREV: case C93_8X8_FROM_PREV:
offset = bytestream_get_le16(&buf); offset = bytestream2_get_le16(&gb);
if (copy_block(avctx, out, copy_from, offset, 8, stride)) if (copy_block(avctx, out, copy_from, offset, 8, stride))
return -1; return -1;
break; break;
...@@ -183,7 +175,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -183,7 +175,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
case C93_4X4_FROM_PREV: case C93_4X4_FROM_PREV:
for (j = 0; j < 8; j += 4) { for (j = 0; j < 8; j += 4) {
for (i = 0; i < 8; i += 4) { for (i = 0; i < 8; i += 4) {
offset = bytestream_get_le16(&buf); offset = bytestream2_get_le16(&gb);
if (copy_block(avctx, &out[j*stride+i], if (copy_block(avctx, &out[j*stride+i],
copy_from, offset, 4, stride)) copy_from, offset, 4, stride))
return -1; return -1;
...@@ -192,10 +184,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -192,10 +184,10 @@ static int decode_frame(AVCodecContext *avctx, void *data,
break; break;
case C93_8X8_2COLOR: case C93_8X8_2COLOR:
bytestream_get_buffer(&buf, cols, 2); bytestream2_get_buffer(&gb, cols, 2);
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
draw_n_color(out + i*stride, stride, 8, 1, 1, cols, draw_n_color(out + i*stride, stride, 8, 1, 1, cols,
NULL, *buf++); NULL, bytestream2_get_byte(&gb));
} }
break; break;
...@@ -206,17 +198,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -206,17 +198,17 @@ static int decode_frame(AVCodecContext *avctx, void *data,
for (j = 0; j < 8; j += 4) { for (j = 0; j < 8; j += 4) {
for (i = 0; i < 8; i += 4) { for (i = 0; i < 8; i += 4) {
if (block_type == C93_4X4_2COLOR) { if (block_type == C93_4X4_2COLOR) {
bytestream_get_buffer(&buf, cols, 2); bytestream2_get_buffer(&gb, cols, 2);
draw_n_color(out + i + j*stride, stride, 4, 4, draw_n_color(out + i + j*stride, stride, 4, 4,
1, cols, NULL, bytestream_get_le16(&buf)); 1, cols, NULL, bytestream2_get_le16(&gb));
} else if (block_type == C93_4X4_4COLOR) { } else if (block_type == C93_4X4_4COLOR) {
bytestream_get_buffer(&buf, cols, 4); bytestream2_get_buffer(&gb, cols, 4);
draw_n_color(out + i + j*stride, stride, 4, 4, draw_n_color(out + i + j*stride, stride, 4, 4,
2, cols, NULL, bytestream_get_le32(&buf)); 2, cols, NULL, bytestream2_get_le32(&gb));
} else { } else {
bytestream_get_buffer(&buf, grps, 4); bytestream2_get_buffer(&gb, grps, 4);
draw_n_color(out + i + j*stride, stride, 4, 4, draw_n_color(out + i + j*stride, stride, 4, 4,
1, cols, grps, bytestream_get_le16(&buf)); 1, cols, grps, bytestream2_get_le16(&gb));
} }
} }
} }
...@@ -227,7 +219,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -227,7 +219,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
case C93_8X8_INTRA: case C93_8X8_INTRA:
for (j = 0; j < 8; j++) for (j = 0; j < 8; j++)
bytestream_get_buffer(&buf, out + j*stride, 8); bytestream2_get_buffer(&gb, out + j*stride, 8);
break; break;
default: default:
...@@ -240,6 +232,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, ...@@ -240,6 +232,16 @@ static int decode_frame(AVCodecContext *avctx, void *data,
} }
} }
if (b & C93_HAS_PALETTE) {
uint32_t *palette = (uint32_t *) newpic->data[1];
for (i = 0; i < 256; i++) {
palette[i] = 0xFFU << 24 | bytestream2_get_be24(&gb);
}
} else {
if (oldpic->data[1])
memcpy(newpic->data[1], oldpic->data[1], 256 * 4);
}
*picture = *newpic; *picture = *newpic;
*data_size = sizeof(AVFrame); *data_size = sizeof(AVFrame);
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include "avcodec.h" #include "avcodec.h"
#include "bytestream.h" #include "bytestream.h"
#include "libavutil/intreadwrite.h"
static av_cold int decode_init(AVCodecContext *avctx) static av_cold int decode_init(AVCodecContext *avctx)
{ {
...@@ -54,7 +53,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -54,7 +53,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
av_log(avctx, AV_LOG_ERROR, "Packet is too small.\n"); av_log(avctx, AV_LOG_ERROR, "Packet is too small.\n");
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
if (bytestream_get_le32(&buf) != AV_RL32("FRW1")) { if (bytestream_get_le32(&buf) != MKTAG('F', 'R', 'W', '1')) {
av_log(avctx, AV_LOG_ERROR, "incorrect marker\n"); av_log(avctx, AV_LOG_ERROR, "incorrect marker\n");
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
......
...@@ -306,24 +306,26 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, ...@@ -306,24 +306,26 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
const uint8_t *buf = avpkt->data; const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size; int buf_size = avpkt->size;
MimicContext *ctx = avctx->priv_data; MimicContext *ctx = avctx->priv_data;
GetByteContext gb;
int is_pframe; int is_pframe;
int width, height; int width, height;
int quality, num_coeffs; int quality, num_coeffs;
int swap_buf_size = buf_size - MIMIC_HEADER_SIZE; int swap_buf_size = buf_size - MIMIC_HEADER_SIZE;
if(buf_size < MIMIC_HEADER_SIZE) { if (buf_size <= MIMIC_HEADER_SIZE) {
av_log(avctx, AV_LOG_ERROR, "insufficient data\n"); av_log(avctx, AV_LOG_ERROR, "insufficient data\n");
return -1; return -1;
} }
buf += 2; /* some constant (always 256) */ bytestream2_init(&gb, buf, MIMIC_HEADER_SIZE);
quality = bytestream_get_le16(&buf); bytestream2_skip(&gb, 2); /* some constant (always 256) */
width = bytestream_get_le16(&buf); quality = bytestream2_get_le16u(&gb);
height = bytestream_get_le16(&buf); width = bytestream2_get_le16u(&gb);
buf += 4; /* some constant */ height = bytestream2_get_le16u(&gb);
is_pframe = bytestream_get_le32(&buf); bytestream2_skip(&gb, 4); /* some constant */
num_coeffs = bytestream_get_byte(&buf); is_pframe = bytestream2_get_le32u(&gb);
buf += 3; /* some constant */ num_coeffs = bytestream2_get_byteu(&gb);
bytestream2_skip(&gb, 3); /* some constant */
if(!ctx->avctx) { if(!ctx->avctx) {
int i; int i;
...@@ -372,7 +374,7 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, ...@@ -372,7 +374,7 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
ctx->dsp.bswap_buf(ctx->swap_buf, ctx->dsp.bswap_buf(ctx->swap_buf,
(const uint32_t*) buf, (const uint32_t*) (buf + MIMIC_HEADER_SIZE),
swap_buf_size>>2); swap_buf_size>>2);
init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3); init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3);
......
...@@ -141,6 +141,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -141,6 +141,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
const uint8_t *src = avpkt->data; const uint8_t *src = avpkt->data;
int buf_size = avpkt->size; int buf_size = avpkt->size;
PCMBRDecode *s = avctx->priv_data; PCMBRDecode *s = avctx->priv_data;
GetByteContext gb;
int num_source_channels, channel, retval; int num_source_channels, channel, retval;
int sample_size, samples; int sample_size, samples;
int16_t *dst16; int16_t *dst16;
...@@ -156,6 +157,8 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -156,6 +157,8 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
src += 4; src += 4;
buf_size -= 4; buf_size -= 4;
bytestream2_init(&gb, src, buf_size);
/* There's always an even number of channels in the source */ /* There's always an even number of channels in the source */
num_source_channels = FFALIGN(avctx->channels, 2); num_source_channels = FFALIGN(avctx->channels, 2);
sample_size = (num_source_channels * (avctx->sample_fmt == AV_SAMPLE_FMT_S16 ? 16 : 24)) >> 3; sample_size = (num_source_channels * (avctx->sample_fmt == AV_SAMPLE_FMT_S16 ? 16 : 24)) >> 3;
...@@ -179,15 +182,15 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -179,15 +182,15 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
samples *= num_source_channels; samples *= num_source_channels;
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
memcpy(dst16, src, buf_size); bytestream2_get_buffer(&gb, dst16, buf_size);
#else #else
do { do {
*dst16++ = bytestream_get_be16(&src); *dst16++ = bytestream2_get_be16u(&gb);
} while (--samples); } while (--samples);
#endif #endif
} else { } else {
do { do {
*dst32++ = bytestream_get_be24(&src) << 8; *dst32++ = bytestream2_get_be24u(&gb) << 8;
} while (--samples); } while (--samples);
} }
break; break;
...@@ -199,24 +202,23 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -199,24 +202,23 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do { do {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
memcpy(dst16, src, avctx->channels * 2); bytestream2_get_buffer(&gb, dst16, avctx->channels * 2);
dst16 += avctx->channels; dst16 += avctx->channels;
src += sample_size;
#else #else
channel = avctx->channels; channel = avctx->channels;
do { do {
*dst16++ = bytestream_get_be16(&src); *dst16++ = bytestream2_get_be16u(&gb);
} while (--channel); } while (--channel);
src += 2;
#endif #endif
bytestream2_skip(&gb, 2);
} while (--samples); } while (--samples);
} else { } else {
do { do {
channel = avctx->channels; channel = avctx->channels;
do { do {
*dst32++ = bytestream_get_be24(&src) << 8; *dst32++ = bytestream2_get_be24u(&gb) << 8;
} while (--channel); } while (--channel);
src += 3; bytestream2_skip(&gb, 3);
} while (--samples); } while (--samples);
} }
break; break;
...@@ -224,22 +226,22 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -224,22 +226,22 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
case AV_CH_LAYOUT_5POINT1: case AV_CH_LAYOUT_5POINT1:
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do { do {
dst16[0] = bytestream_get_be16(&src); dst16[0] = bytestream2_get_be16u(&gb);
dst16[1] = bytestream_get_be16(&src); dst16[1] = bytestream2_get_be16u(&gb);
dst16[2] = bytestream_get_be16(&src); dst16[2] = bytestream2_get_be16u(&gb);
dst16[4] = bytestream_get_be16(&src); dst16[4] = bytestream2_get_be16u(&gb);
dst16[5] = bytestream_get_be16(&src); dst16[5] = bytestream2_get_be16u(&gb);
dst16[3] = bytestream_get_be16(&src); dst16[3] = bytestream2_get_be16u(&gb);
dst16 += 6; dst16 += 6;
} while (--samples); } while (--samples);
} else { } else {
do { do {
dst32[0] = bytestream_get_be24(&src) << 8; dst32[0] = bytestream2_get_be24u(&gb) << 8;
dst32[1] = bytestream_get_be24(&src) << 8; dst32[1] = bytestream2_get_be24u(&gb) << 8;
dst32[2] = bytestream_get_be24(&src) << 8; dst32[2] = bytestream2_get_be24u(&gb) << 8;
dst32[4] = bytestream_get_be24(&src) << 8; dst32[4] = bytestream2_get_be24u(&gb) << 8;
dst32[5] = bytestream_get_be24(&src) << 8; dst32[5] = bytestream2_get_be24u(&gb) << 8;
dst32[3] = bytestream_get_be24(&src) << 8; dst32[3] = bytestream2_get_be24u(&gb) << 8;
dst32 += 6; dst32 += 6;
} while (--samples); } while (--samples);
} }
...@@ -248,27 +250,27 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -248,27 +250,27 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
case AV_CH_LAYOUT_7POINT0: case AV_CH_LAYOUT_7POINT0:
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do { do {
dst16[0] = bytestream_get_be16(&src); dst16[0] = bytestream2_get_be16u(&gb);
dst16[1] = bytestream_get_be16(&src); dst16[1] = bytestream2_get_be16u(&gb);
dst16[2] = bytestream_get_be16(&src); dst16[2] = bytestream2_get_be16u(&gb);
dst16[5] = bytestream_get_be16(&src); dst16[5] = bytestream2_get_be16u(&gb);
dst16[3] = bytestream_get_be16(&src); dst16[3] = bytestream2_get_be16u(&gb);
dst16[4] = bytestream_get_be16(&src); dst16[4] = bytestream2_get_be16u(&gb);
dst16[6] = bytestream_get_be16(&src); dst16[6] = bytestream2_get_be16u(&gb);
dst16 += 7; dst16 += 7;
src += 2; bytestream2_skip(&gb, 2);
} while (--samples); } while (--samples);
} else { } else {
do { do {
dst32[0] = bytestream_get_be24(&src) << 8; dst32[0] = bytestream2_get_be24u(&gb) << 8;
dst32[1] = bytestream_get_be24(&src) << 8; dst32[1] = bytestream2_get_be24u(&gb) << 8;
dst32[2] = bytestream_get_be24(&src) << 8; dst32[2] = bytestream2_get_be24u(&gb) << 8;
dst32[5] = bytestream_get_be24(&src) << 8; dst32[5] = bytestream2_get_be24u(&gb) << 8;
dst32[3] = bytestream_get_be24(&src) << 8; dst32[3] = bytestream2_get_be24u(&gb) << 8;
dst32[4] = bytestream_get_be24(&src) << 8; dst32[4] = bytestream2_get_be24u(&gb) << 8;
dst32[6] = bytestream_get_be24(&src) << 8; dst32[6] = bytestream2_get_be24u(&gb) << 8;
dst32 += 7; dst32 += 7;
src += 3; bytestream2_skip(&gb, 3);
} while (--samples); } while (--samples);
} }
break; break;
...@@ -276,26 +278,26 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -276,26 +278,26 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
case AV_CH_LAYOUT_7POINT1: case AV_CH_LAYOUT_7POINT1:
if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) { if (AV_SAMPLE_FMT_S16 == avctx->sample_fmt) {
do { do {
dst16[0] = bytestream_get_be16(&src); dst16[0] = bytestream2_get_be16u(&gb);
dst16[1] = bytestream_get_be16(&src); dst16[1] = bytestream2_get_be16u(&gb);
dst16[2] = bytestream_get_be16(&src); dst16[2] = bytestream2_get_be16u(&gb);
dst16[6] = bytestream_get_be16(&src); dst16[6] = bytestream2_get_be16u(&gb);
dst16[4] = bytestream_get_be16(&src); dst16[4] = bytestream2_get_be16u(&gb);
dst16[5] = bytestream_get_be16(&src); dst16[5] = bytestream2_get_be16u(&gb);
dst16[7] = bytestream_get_be16(&src); dst16[7] = bytestream2_get_be16u(&gb);
dst16[3] = bytestream_get_be16(&src); dst16[3] = bytestream2_get_be16u(&gb);
dst16 += 8; dst16 += 8;
} while (--samples); } while (--samples);
} else { } else {
do { do {
dst32[0] = bytestream_get_be24(&src) << 8; dst32[0] = bytestream2_get_be24u(&gb) << 8;
dst32[1] = bytestream_get_be24(&src) << 8; dst32[1] = bytestream2_get_be24u(&gb) << 8;
dst32[2] = bytestream_get_be24(&src) << 8; dst32[2] = bytestream2_get_be24u(&gb) << 8;
dst32[6] = bytestream_get_be24(&src) << 8; dst32[6] = bytestream2_get_be24u(&gb) << 8;
dst32[4] = bytestream_get_be24(&src) << 8; dst32[4] = bytestream2_get_be24u(&gb) << 8;
dst32[5] = bytestream_get_be24(&src) << 8; dst32[5] = bytestream2_get_be24u(&gb) << 8;
dst32[7] = bytestream_get_be24(&src) << 8; dst32[7] = bytestream2_get_be24u(&gb) << 8;
dst32[3] = bytestream_get_be24(&src) << 8; dst32[3] = bytestream2_get_be24u(&gb) << 8;
dst32 += 8; dst32 += 8;
} while (--samples); } while (--samples);
} }
...@@ -306,7 +308,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data, ...@@ -306,7 +308,7 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
*got_frame_ptr = 1; *got_frame_ptr = 1;
*(AVFrame *)data = s->frame; *(AVFrame *)data = s->frame;
retval = src - avpkt->data; retval = bytestream2_tell(&gb);
if (avctx->debug & FF_DEBUG_BITSTREAM) if (avctx->debug & FF_DEBUG_BITSTREAM)
av_dlog(avctx, "pcm_bluray_decode_frame: decoded %d -> %d bytes\n", av_dlog(avctx, "pcm_bluray_decode_frame: decoded %d -> %d bytes\n",
retval, buf_size); retval, buf_size);
......
...@@ -21,19 +21,19 @@ ...@@ -21,19 +21,19 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "libavutil/intreadwrite.h" #include "libavcodec/bytestream.h"
#include "avcodec.h" #include "avcodec.h"
#include "s3tc.h" #include "s3tc.h"
static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d, static inline void dxt1_decode_pixels(GetByteContext *gb, uint32_t *d,
unsigned int qstride, unsigned int flag, unsigned int qstride, unsigned int flag,
uint64_t alpha) { uint64_t alpha) {
unsigned int x, y, c0, c1, a = (!flag * 255u) << 24; unsigned int x, y, c0, c1, a = (!flag * 255u) << 24;
unsigned int rb0, rb1, rb2, rb3, g0, g1, g2, g3; unsigned int rb0, rb1, rb2, rb3, g0, g1, g2, g3;
uint32_t colors[4], pixels; uint32_t colors[4], pixels;
c0 = AV_RL16(s); c0 = bytestream2_get_le16(gb);
c1 = AV_RL16(s+2); c1 = bytestream2_get_le16(gb);
rb0 = (c0<<3 | c0<<8) & 0xf800f8; rb0 = (c0<<3 | c0<<8) & 0xf800f8;
rb1 = (c1<<3 | c1<<8) & 0xf800f8; rb1 = (c1<<3 | c1<<8) & 0xf800f8;
...@@ -61,7 +61,7 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d, ...@@ -61,7 +61,7 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d,
colors[2] = rb2 + g2 + a; colors[2] = rb2 + g2 + a;
pixels = AV_RL32(s+4); pixels = bytestream2_get_le32(gb);
for (y=0; y<4; y++) { for (y=0; y<4; y++) {
for (x=0; x<4; x++) { for (x=0; x<4; x++) {
a = (alpha & 0x0f) << 28; a = (alpha & 0x0f) << 28;
...@@ -74,24 +74,24 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d, ...@@ -74,24 +74,24 @@ static inline void dxt1_decode_pixels(const uint8_t *s, uint32_t *d,
} }
} }
void ff_decode_dxt1(const uint8_t *s, uint8_t *dst, void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h, const unsigned int w, const unsigned int h,
const unsigned int stride) { const unsigned int stride) {
unsigned int bx, by, qstride = stride/4; unsigned int bx, by, qstride = stride/4;
uint32_t *d = (uint32_t *) dst; uint32_t *d = (uint32_t *) dst;
for (by=0; by < h/4; by++, d += stride-w) for (by=0; by < h/4; by++, d += stride-w)
for (bx=0; bx < w/4; bx++, s+=8, d+=4) for (bx = 0; bx < w / 4; bx++, d += 4)
dxt1_decode_pixels(s, d, qstride, 0, 0LL); dxt1_decode_pixels(gb, d, qstride, 0, 0LL);
} }
void ff_decode_dxt3(const uint8_t *s, uint8_t *dst, void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h, const unsigned int w, const unsigned int h,
const unsigned int stride) { const unsigned int stride) {
unsigned int bx, by, qstride = stride/4; unsigned int bx, by, qstride = stride/4;
uint32_t *d = (uint32_t *) dst; uint32_t *d = (uint32_t *) dst;
for (by=0; by < h/4; by++, d += stride-w) for (by=0; by < h/4; by++, d += stride-w)
for (bx=0; bx < w/4; bx++, s+=16, d+=4) for (bx = 0; bx < w / 4; bx++, d += 4)
dxt1_decode_pixels(s+8, d, qstride, 1, AV_RL64(s)); dxt1_decode_pixels(gb, d, qstride, 1, bytestream2_get_le64(gb));
} }
...@@ -29,24 +29,24 @@ ...@@ -29,24 +29,24 @@
/** /**
* Decode DXT1 encoded data to RGB32 * Decode DXT1 encoded data to RGB32
* @param src source buffer, has to be aligned on a 4-byte boundary * @param gb GetByteContext
* @param dst destination buffer * @param dst destination buffer
* @param w width of output image * @param w width of output image
* @param h height of output image * @param h height of output image
* @param stride line size of output image * @param stride line size of output image
*/ */
void ff_decode_dxt1(const uint8_t *src, uint8_t *dst, void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h, const unsigned int w, const unsigned int h,
const unsigned int stride); const unsigned int stride);
/** /**
* Decode DXT3 encoded data to RGB32 * Decode DXT3 encoded data to RGB32
* @param src source buffer, has to be aligned on a 4-byte boundary * @param gb GetByteContext
* @param dst destination buffer * @param dst destination buffer
* @param w width of output image * @param w width of output image
* @param h height of output image * @param h height of output image
* @param stride line size of output image * @param stride line size of output image
*/ */
void ff_decode_dxt3(const uint8_t *src, uint8_t *dst, void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst,
const unsigned int w, const unsigned int h, const unsigned int w, const unsigned int h,
const unsigned int stride); const unsigned int stride);
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "libavutil/imgutils.h" #include "libavutil/imgutils.h"
#include "bytestream.h" #include "bytestream.h"
#include "avcodec.h" #include "avcodec.h"
#include "bytestream.h"
#include "s3tc.h" #include "s3tc.h"
typedef struct TXDContext { typedef struct TXDContext {
...@@ -42,28 +43,25 @@ static av_cold int txd_init(AVCodecContext *avctx) { ...@@ -42,28 +43,25 @@ static av_cold int txd_init(AVCodecContext *avctx) {
static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt) { AVPacket *avpkt) {
const uint8_t *buf = avpkt->data;
const uint8_t *buf_end = avpkt->data + avpkt->size;
TXDContext * const s = avctx->priv_data; TXDContext * const s = avctx->priv_data;
GetByteContext gb;
AVFrame *picture = data; AVFrame *picture = data;
AVFrame * const p = &s->picture; AVFrame * const p = &s->picture;
unsigned int version, w, h, d3d_format, depth, stride, mipmap_count, flags; unsigned int version, w, h, d3d_format, depth, stride, mipmap_count, flags;
unsigned int y, v; unsigned int y, v;
uint8_t *ptr; uint8_t *ptr;
const uint8_t *cur = buf;
const uint32_t *palette = (const uint32_t *)(cur + 88);
uint32_t *pal; uint32_t *pal;
if (buf_end - cur < 92) bytestream2_init(&gb, avpkt->data, avpkt->size);
return AVERROR_INVALIDDATA; version = bytestream2_get_le32(&gb);
version = AV_RL32(cur); bytestream2_skip(&gb, 72);
d3d_format = AV_RL32(cur+76); d3d_format = bytestream2_get_le32(&gb);
w = AV_RL16(cur+80); w = bytestream2_get_le16(&gb);
h = AV_RL16(cur+82); h = bytestream2_get_le16(&gb);
depth = AV_RL8 (cur+84); depth = bytestream2_get_byte(&gb);
mipmap_count = AV_RL8 (cur+85); mipmap_count = bytestream2_get_byte(&gb);
flags = AV_RL8 (cur+87); bytestream2_skip(&gb, 1);
cur += 92; flags = bytestream2_get_byte(&gb);
if (version < 8 || version > 9) { if (version < 8 || version > 9) {
av_log(avctx, AV_LOG_ERROR, "texture data version %i is unsupported\n", av_log(avctx, AV_LOG_ERROR, "texture data version %i is unsupported\n",
...@@ -73,12 +71,9 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -73,12 +71,9 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
if (depth == 8) { if (depth == 8) {
avctx->pix_fmt = PIX_FMT_PAL8; avctx->pix_fmt = PIX_FMT_PAL8;
if (buf_end - cur < 1024) } else if (depth == 16 || depth == 32) {
return AVERROR_INVALIDDATA;
cur += 1024;
} else if (depth == 16 || depth == 32)
avctx->pix_fmt = PIX_FMT_RGB32; avctx->pix_fmt = PIX_FMT_RGB32;
else { } else {
av_log(avctx, AV_LOG_ERROR, "depth of %i is unsupported\n", depth); av_log(avctx, AV_LOG_ERROR, "depth of %i is unsupported\n", depth);
return -1; return -1;
} }
...@@ -102,31 +97,32 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -102,31 +97,32 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
if (depth == 8) { if (depth == 8) {
pal = (uint32_t *) p->data[1]; pal = (uint32_t *) p->data[1];
for (y=0; y<256; y++) { for (y = 0; y < 256; y++) {
v = AV_RB32(palette+y); v = bytestream2_get_be32(&gb);
pal[y] = (v>>8) + (v<<24); pal[y] = (v >> 8) + (v << 24);
} }
if (buf_end - cur < w * h) if (bytestream2_get_bytes_left(&gb) < w * h)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
bytestream2_skip(&gb, 4);
for (y=0; y<h; y++) { for (y=0; y<h; y++) {
memcpy(ptr, cur, w); bytestream2_get_buffer(&gb, ptr, w);
ptr += stride; ptr += stride;
cur += w;
} }
} else if (depth == 16) { } else if (depth == 16) {
bytestream2_skip(&gb, 4);
switch (d3d_format) { switch (d3d_format) {
case 0: case 0:
if (!(flags & 1)) if (!(flags & 1))
goto unsupported; goto unsupported;
case FF_S3TC_DXT1: case FF_S3TC_DXT1:
if (buf_end - cur < (w/4) * (h/4) * 8) if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 8)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
ff_decode_dxt1(cur, ptr, w, h, stride); ff_decode_dxt1(&gb, ptr, w, h, stride);
break; break;
case FF_S3TC_DXT3: case FF_S3TC_DXT3:
if (buf_end - cur < (w/4) * (h/4) * 16) if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 16)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
ff_decode_dxt3(cur, ptr, w, h, stride); ff_decode_dxt3(&gb, ptr, w, h, stride);
break; break;
default: default:
goto unsupported; goto unsupported;
...@@ -135,12 +131,11 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -135,12 +131,11 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
switch (d3d_format) { switch (d3d_format) {
case 0x15: case 0x15:
case 0x16: case 0x16:
if (buf_end - cur < h * w * 4) if (bytestream2_get_bytes_left(&gb) < h * w * 4)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
for (y=0; y<h; y++) { for (y=0; y<h; y++) {
memcpy(ptr, cur, w*4); bytestream2_get_buffer(&gb, ptr, w * 4);
ptr += stride; ptr += stride;
cur += w*4;
} }
break; break;
default: default:
...@@ -148,17 +143,10 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, ...@@ -148,17 +143,10 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
} }
} }
for (; mipmap_count > 1 && buf_end - cur >= 4; mipmap_count--) {
uint32_t length = bytestream_get_le32(&cur);
if (buf_end - cur < length)
break;
cur += length;
}
*picture = s->picture; *picture = s->picture;
*data_size = sizeof(AVPicture); *data_size = sizeof(AVPicture);
return cur - buf; return avpkt->size;
unsupported: unsupported:
av_log(avctx, AV_LOG_ERROR, "unsupported d3d format (%08x)\n", d3d_format); av_log(avctx, AV_LOG_ERROR, "unsupported d3d format (%08x)\n", d3d_format);
......
...@@ -385,7 +385,7 @@ static int iff_read_packet(AVFormatContext *s, ...@@ -385,7 +385,7 @@ static int iff_read_packet(AVFormatContext *s,
AVInputFormat ff_iff_demuxer = { AVInputFormat ff_iff_demuxer = {
.name = "IFF", .name = "IFF",
.long_name = NULL_IF_CONFIG_SMALL("IFF format"), .long_name = NULL_IF_CONFIG_SMALL("Interchange File Format"),
.priv_data_size = sizeof(IffDemuxContext), .priv_data_size = sizeof(IffDemuxContext),
.read_probe = iff_probe, .read_probe = iff_probe,
.read_header = iff_read_header, .read_header = iff_read_header,
......
...@@ -1005,7 +1005,7 @@ start: ...@@ -1005,7 +1005,7 @@ start:
av_freep(content_ptr); av_freep(content_ptr);
/* If method is set, this is called from ff_rtsp_send_cmd, /* If method is set, this is called from ff_rtsp_send_cmd,
* where a reply to exactly this request is awaited. For * where a reply to exactly this request is awaited. For
* callers from within packet reciving, we just want to * callers from within packet receiving, we just want to
* return to the caller and go back to receiving packets. */ * return to the caller and go back to receiving packets. */
if (method) if (method)
goto start; goto start;
......
...@@ -77,7 +77,6 @@ OBJS = adler32.o \ ...@@ -77,7 +77,6 @@ OBJS = adler32.o \
tree.o \ tree.o \
utils.o \ utils.o \
OBJS-$(ARCH_ARM) += arm/cpu.o
OBJS-$(ARCH_PPC) += ppc/cpu.o OBJS-$(ARCH_PPC) += ppc/cpu.o
OBJS-$(ARCH_X86) += x86/cpu.o OBJS-$(ARCH_X86) += x86/cpu.o
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "config.h"
int ff_get_cpu_flags_arm(void)
{
return HAVE_IWMMXT * AV_CPU_FLAG_IWMMXT;
}
...@@ -31,7 +31,6 @@ int av_get_cpu_flags(void) ...@@ -31,7 +31,6 @@ int av_get_cpu_flags(void)
if (checked) if (checked)
return flags; return flags;
if (ARCH_ARM) flags = ff_get_cpu_flags_arm();
if (ARCH_PPC) flags = ff_get_cpu_flags_ppc(); if (ARCH_PPC) flags = ff_get_cpu_flags_ppc();
if (ARCH_X86) flags = ff_get_cpu_flags_x86(); if (ARCH_X86) flags = ff_get_cpu_flags_x86();
...@@ -55,9 +54,7 @@ static const struct { ...@@ -55,9 +54,7 @@ static const struct {
int flag; int flag;
const char *name; const char *name;
} cpu_flag_tab[] = { } cpu_flag_tab[] = {
#if ARCH_ARM #if ARCH_PPC
{ AV_CPU_FLAG_IWMMXT, "iwmmxt" },
#elif ARCH_PPC
{ AV_CPU_FLAG_ALTIVEC, "altivec" }, { AV_CPU_FLAG_ALTIVEC, "altivec" },
#elif ARCH_X86 #elif ARCH_X86
{ AV_CPU_FLAG_MMX, "mmx" }, { AV_CPU_FLAG_MMX, "mmx" },
......
...@@ -43,7 +43,6 @@ ...@@ -43,7 +43,6 @@
#define AV_CPU_FLAG_CMOV 0x1000000 ///< supports cmov instruction #define AV_CPU_FLAG_CMOV 0x1000000 ///< supports cmov instruction
#define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions #define AV_CPU_FLAG_XOP 0x0400 ///< Bulldozer XOP functions
#define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions #define AV_CPU_FLAG_FMA4 0x0800 ///< Bulldozer FMA4 functions
#define AV_CPU_FLAG_IWMMXT 0x0100 ///< XScale IWMMXT
#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
/** /**
...@@ -67,7 +66,6 @@ void av_force_cpu_flags(int flags); ...@@ -67,7 +66,6 @@ void av_force_cpu_flags(int flags);
attribute_deprecated void av_set_cpu_flags_mask(int mask); attribute_deprecated void av_set_cpu_flags_mask(int mask);
/* The following CPU-specific functions shall not be called directly. */ /* The following CPU-specific functions shall not be called directly. */
int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void); int ff_get_cpu_flags_x86(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment