Commit cac9877e authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  sws: implement MMX/SSE2/SSSE3/SSE4 versions for horizontal scaling.
  include stdint.h in adpcm_data.h
  mpeg12: reorder functions to avoid ugly forward declarations
  Fixed off by one packet size allocation in the smacker demuxer.
  Check for invalid packet size in the smacker demuxer.
  ape demuxer: fix segfault on memory allocation failure.
  xan: Add some buffer checks
  xan: Remove extra trailing newline
  Fixed size given to init_get_bits() in xan decoder.

Conflicts:
	libavcodec/mpeg12.c
	libswscale/x86/swscale_template.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 5ad01dec e0c3e073
......@@ -26,6 +26,8 @@
#ifndef AVCODEC_ADPCM_DATA_H
#define AVCODEC_ADPCM_DATA_H
#include <stdint.h>
extern const int8_t ff_adpcm_index_table[16];
extern const int16_t ff_adpcm_step_table[89];
extern const int16_t ff_adpcm_AdaptationTable[];
......
This diff is collapsed.
......@@ -97,17 +97,21 @@ static av_cold int xan_decode_init(AVCodecContext *avctx)
return 0;
}
static int xan_huffman_decode(unsigned char *dest, const unsigned char *src,
int dest_len)
static int xan_huffman_decode(unsigned char *dest, int dest_len,
const unsigned char *src, int src_len)
{
unsigned char byte = *src++;
unsigned char ival = byte + 0x16;
const unsigned char * ptr = src + byte*2;
int ptr_len = src_len - 1 - byte*2;
unsigned char val = ival;
unsigned char *dest_end = dest + dest_len;
GetBitContext gb;
init_get_bits(&gb, ptr, 0); // FIXME: no src size available
if (ptr_len < 0)
return AVERROR_INVALIDDATA;
init_get_bits(&gb, ptr, ptr_len * 8);
while ( val != 0x16 ) {
val = src[val - 0x17 + get_bits1(&gb) * byte];
......@@ -246,7 +250,7 @@ static inline void xan_wc3_copy_pixel_run(XanContext *s,
}
}
static void xan_wc3_decode_frame(XanContext *s) {
static int xan_wc3_decode_frame(XanContext *s) {
int width = s->avctx->width;
int height = s->avctx->height;
......@@ -266,13 +270,30 @@ static void xan_wc3_decode_frame(XanContext *s) {
const unsigned char *size_segment;
const unsigned char *vector_segment;
const unsigned char *imagedata_segment;
int huffman_offset, size_offset, vector_offset, imagedata_offset;
if (s->size < 8)
return AVERROR_INVALIDDATA;
huffman_offset = AV_RL16(&s->buf[0]);
size_offset = AV_RL16(&s->buf[2]);
vector_offset = AV_RL16(&s->buf[4]);
imagedata_offset = AV_RL16(&s->buf[6]);
huffman_segment = s->buf + AV_RL16(&s->buf[0]);
size_segment = s->buf + AV_RL16(&s->buf[2]);
vector_segment = s->buf + AV_RL16(&s->buf[4]);
imagedata_segment = s->buf + AV_RL16(&s->buf[6]);
if (huffman_offset >= s->size ||
size_offset >= s->size ||
vector_offset >= s->size ||
imagedata_offset >= s->size)
return AVERROR_INVALIDDATA;
xan_huffman_decode(opcode_buffer, huffman_segment, opcode_buffer_size);
huffman_segment = s->buf + huffman_offset;
size_segment = s->buf + size_offset;
vector_segment = s->buf + vector_offset;
imagedata_segment = s->buf + imagedata_offset;
if (xan_huffman_decode(opcode_buffer, opcode_buffer_size,
huffman_segment, s->size - huffman_offset) < 0)
return AVERROR_INVALIDDATA;
if (imagedata_segment[0] == 2)
xan_unpack(s->buffer2, &imagedata_segment[1], s->buffer2_size);
......@@ -358,6 +379,7 @@ static void xan_wc3_decode_frame(XanContext *s) {
y += (x + size) / width;
x = (x + size) % width;
}
return 0;
}
#if RUNTIME_GAMMA
......@@ -519,7 +541,8 @@ static int xan_decode_frame(AVCodecContext *avctx,
s->buf = buf;
s->size = buf_size;
xan_wc3_decode_frame(s);
if (xan_wc3_decode_frame(s) < 0)
return AVERROR_INVALIDDATA;
/* release the last frame if it is allocated */
if (s->last_frame.data[0])
......@@ -563,4 +586,3 @@ AVCodec ff_xan_wc3_decoder = {
.capabilities = CODEC_CAP_DR1,
.long_name = NULL_IF_CONFIG_SMALL("Wing Commander III / Xan"),
};
......@@ -19,6 +19,7 @@ OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \
x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o
OBJS-$(HAVE_YASM) += x86/scale.o
TESTPROGS = colorspace swscale
......
This diff is collapsed.
......@@ -176,6 +176,41 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
}
}
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
SwsContext *c, int16_t *data, \
int dstW, const uint8_t *src, \
const int16_t *filter, \
const int16_t *filterPos, int filterSize);
#define SCALE_FUNCS(filter_n, opt) \
SCALE_FUNC(filter_n, 8, 15, opt); \
SCALE_FUNC(filter_n, 9, 15, opt); \
SCALE_FUNC(filter_n, 10, 15, opt); \
SCALE_FUNC(filter_n, 16, 15, opt); \
SCALE_FUNC(filter_n, 8, 19, opt); \
SCALE_FUNC(filter_n, 9, 19, opt); \
SCALE_FUNC(filter_n, 10, 19, opt); \
SCALE_FUNC(filter_n, 16, 19, opt)
#define SCALE_FUNCS_MMX(opt) \
SCALE_FUNCS(4, opt); \
SCALE_FUNCS(8, opt); \
SCALE_FUNCS(X, opt)
#define SCALE_FUNCS_SSE(opt) \
SCALE_FUNCS(4, opt); \
SCALE_FUNCS(8, opt); \
SCALE_FUNCS(X4, opt); \
SCALE_FUNCS(X8, opt)
#if ARCH_X86_32
SCALE_FUNCS_MMX(mmx);
#endif
SCALE_FUNCS_SSE(sse2);
SCALE_FUNCS_SSE(ssse3);
SCALE_FUNCS_SSE(sse4);
void ff_sws_init_swScale_mmx(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
......@@ -186,4 +221,55 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
if (cpu_flags & AV_CPU_FLAG_MMX2)
sws_init_swScale_MMX2(c);
#endif
#if HAVE_YASM
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
ff_hscale8to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 9) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
ff_hscale9to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 10) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
ff_hscale10to19_ ## filtersize ## _ ## opt1; \
} else if(c->srcBpc == 16 && !((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \
hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
ff_hscale16to19_ ## filtersize ## _ ## opt1; \
} \
} while (0)
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
switch (filtersize) { \
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
}
#if ARCH_X86_32
if (cpu_flags & AV_CPU_FLAG_MMX) {
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
}
#endif
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
switch (filtersize) { \
case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
break; \
}
if (cpu_flags & AV_CPU_FLAG_SSE2) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
}
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
}
if (cpu_flags & AV_CPU_FLAG_SSE4) {
/* Xto15 don't need special sse4 functions */
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
}
#endif
}
......@@ -1951,164 +1951,6 @@ static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
}
#if !COMPILE_TEMPLATE_MMX2
// bilinear / bicubic scaling
static void RENAME(hScale)(SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
const int16_t *filterPos, int filterSize)
{
assert(filterSize % 4 == 0 && filterSize>0);
if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
x86_reg counter= -2*dstW;
filter-= counter*2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
"movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
"movd (%3, %%"REG_a"), %%mm0 \n\t"
"movd (%3, %%"REG_b"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else if (filterSize==8) {
x86_reg counter= -2*dstW;
filter-= counter*4;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"pxor %%mm7, %%mm7 \n\t"
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
"movd (%3, %%"REG_a"), %%mm0 \n\t"
"movd (%3, %%"REG_b"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
"movd 4(%3, %%"REG_a"), %%mm4 \n\t"
"movd 4(%3, %%"REG_b"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm4 \n\t"
"pmaddwd %%mm2, %%mm5 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm5, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad $7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else {
const uint8_t *offset = src+filterSize;
x86_reg counter= -2*dstW;
//filter-= counter*filterSize/2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
".p2align 4 \n\t"
"1: \n\t"
"mov %2, %%"REG_c" \n\t"
"movzwl (%%"REG_c", %0), %%eax \n\t"
"movzwl 2(%%"REG_c", %0), %%edx \n\t"
"mov %5, %%"REG_c" \n\t"
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t"
"2: \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1, %6), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_a"), %%mm0 \n\t"
"movd (%%"REG_c", %%"REG_d"), %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"paddd %%mm3, %%mm5 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"add $8, %1 \n\t"
"add $4, %%"REG_c" \n\t"
"cmp %4, %%"REG_c" \n\t"
" jb 2b \n\t"
"add %6, %1 \n\t"
"movq %%mm4, %%mm0 \n\t"
"punpckldq %%mm5, %%mm4 \n\t"
"punpckhdq %%mm5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"psrad $7, %%mm4 \n\t"
"packssdw %%mm4, %%mm4 \n\t"
"mov %3, %%"REG_a" \n\t"
"movd %%mm4, (%%"REG_a", %0) \n\t"
"add $4, %0 \n\t"
" jnc 1b \n\t"
: "+r" (counter), "+r" (filter)
: "m" (filterPos), "m" (dst), "m"(offset),
"m" (src), "r" ((x86_reg)filterSize*2)
: "%"REG_a, "%"REG_c, "%"REG_d
);
}
}
#endif /* !COMPILE_TEMPLATE_MMX2 */
static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
{
......@@ -2265,7 +2107,6 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src,
}
}
#if COMPILE_TEMPLATE_MMX2
static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
int dstWidth, const uint8_t *src,
......@@ -2466,10 +2307,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
if (c->srcBpc == 8 && c->dstBpc <= 10) {
#if !COMPILE_TEMPLATE_MMX2
c->hyScale = c->hcScale = RENAME(hScale );
#endif /* !COMPILE_TEMPLATE_MMX2 */
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMX2
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment