Commit 353aecbb authored by Martin Storsjö's avatar Martin Storsjö

pixblockdsp, avdct: Add get_pixels_unaligned

Use this in vf_spp.c, where the get_pixels operation is done on
unaligned source addresses.

Hook up the x86 (mmx and sse) versions of get_pixels to this
function pointer, as those implementations seem to support unaligned
use.

This fixes fate-filter-spp on armv7.
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent b12b0537
...@@ -120,6 +120,7 @@ int avcodec_dct_init(AVDCT *dsp) ...@@ -120,6 +120,7 @@ int avcodec_dct_init(AVDCT *dsp)
PixblockDSPContext pdsp; PixblockDSPContext pdsp;
ff_pixblockdsp_init(&pdsp, avctx); ff_pixblockdsp_init(&pdsp, avctx);
COPY(pdsp, get_pixels); COPY(pdsp, get_pixels);
COPY(pdsp, get_pixels_unaligned);
} }
#endif #endif
......
...@@ -67,6 +67,10 @@ typedef struct AVDCT { ...@@ -67,6 +67,10 @@ typedef struct AVDCT {
ptrdiff_t line_size); ptrdiff_t line_size);
int bits_per_sample; int bits_per_sample;
void (*get_pixels_unaligned)(int16_t *block /* align 16 */,
const uint8_t *pixels,
ptrdiff_t line_size);
} AVDCT; } AVDCT;
/** /**
......
...@@ -90,10 +90,12 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) ...@@ -90,10 +90,12 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
case 10: case 10:
case 12: case 12:
case 14: case 14:
c->get_pixels_unaligned =
c->get_pixels = get_pixels_16_c; c->get_pixels = get_pixels_16_c;
break; break;
default: default:
if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
c->get_pixels_unaligned =
c->get_pixels = get_pixels_8_c; c->get_pixels = get_pixels_8_c;
} }
break; break;
......
...@@ -29,6 +29,9 @@ typedef struct PixblockDSPContext { ...@@ -29,6 +29,9 @@ typedef struct PixblockDSPContext {
void (*get_pixels)(int16_t *av_restrict block /* align 16 */, void (*get_pixels)(int16_t *av_restrict block /* align 16 */,
const uint8_t *pixels /* align 8 */, const uint8_t *pixels /* align 8 */,
ptrdiff_t stride); ptrdiff_t stride);
void (*get_pixels_unaligned)(int16_t *av_restrict block /* align 16 */,
const uint8_t *pixels,
ptrdiff_t stride);
void (*diff_pixels)(int16_t *av_restrict block /* align 16 */, void (*diff_pixels)(int16_t *av_restrict block /* align 16 */,
const uint8_t *s1 /* align 8 */, const uint8_t *s1 /* align 8 */,
const uint8_t *s2 /* align 8 */, const uint8_t *s2 /* align 8 */,
......
...@@ -37,15 +37,19 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, ...@@ -37,15 +37,19 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) { if (EXTERNAL_MMX(cpu_flags)) {
if (!high_bit_depth) if (!high_bit_depth) {
c->get_pixels_unaligned =
c->get_pixels = ff_get_pixels_mmx; c->get_pixels = ff_get_pixels_mmx;
}
c->diff_pixels_unaligned = c->diff_pixels_unaligned =
c->diff_pixels = ff_diff_pixels_mmx; c->diff_pixels = ff_diff_pixels_mmx;
} }
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
if (!high_bit_depth) if (!high_bit_depth) {
c->get_pixels_unaligned =
c->get_pixels = ff_get_pixels_sse2; c->get_pixels = ff_get_pixels_sse2;
}
c->diff_pixels_unaligned = c->diff_pixels_unaligned =
c->diff_pixels = ff_diff_pixels_sse2; c->diff_pixels = ff_diff_pixels_sse2;
} }
......
...@@ -283,7 +283,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, ...@@ -283,7 +283,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src,
const int x1 = x + offset[i + count - 1][0]; const int x1 = x + offset[i + count - 1][0];
const int y1 = y + offset[i + count - 1][1]; const int y1 = y + offset[i + count - 1][1];
const int index = x1 + y1*linesize; const int index = x1 + y1*linesize;
p->dct->get_pixels(block, p->src + sample_bytes*index, sample_bytes*linesize); p->dct->get_pixels_unaligned(block, p->src + sample_bytes*index, sample_bytes*linesize);
p->dct->fdct(block); p->dct->fdct(block);
p->requantize(block2, block, qp, p->dct->idct_permutation); p->requantize(block2, block, qp, p->dct->idct_permutation);
p->dct->idct(block2); p->dct->idct(block2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment