Commit 35af7add authored by Paul B Mahol's avatar Paul B Mahol

avcodec/takdec: add x86 SIMD for rest of decorrelation modes

Signed-off-by: 's avatarPaul B Mahol <onemda@gmail.com>
parent 2f4374fa
...@@ -491,7 +491,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \ ...@@ -491,7 +491,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \
h263.o ituh263enc.o h263.o ituh263enc.o
OBJS-$(CONFIG_SVQ3_DECODER) += svq3.o svq13.o mpegutils.o OBJS-$(CONFIG_SVQ3_DECODER) += svq3.o svq13.o mpegutils.o
OBJS-$(CONFIG_TEXT_DECODER) += textdec.o ass.o OBJS-$(CONFIG_TEXT_DECODER) += textdec.o ass.o
OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o takdsp.o
OBJS-$(CONFIG_TARGA_DECODER) += targa.o OBJS-$(CONFIG_TARGA_DECODER) += targa.o
OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o
OBJS-$(CONFIG_TARGA_Y216_DECODER) += targa_y216dec.o OBJS-$(CONFIG_TARGA_Y216_DECODER) += targa_y216dec.o
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/samplefmt.h" #include "libavutil/samplefmt.h"
#include "tak.h" #include "tak.h"
#include "takdsp.h"
#include "audiodsp.h" #include "audiodsp.h"
#include "thread.h" #include "thread.h"
#include "avcodec.h" #include "avcodec.h"
...@@ -47,6 +48,7 @@ typedef struct MCDParam { ...@@ -47,6 +48,7 @@ typedef struct MCDParam {
typedef struct TAKDecContext { typedef struct TAKDecContext {
AVCodecContext *avctx; ///< parent AVCodecContext AVCodecContext *avctx; ///< parent AVCodecContext
AudioDSPContext adsp; AudioDSPContext adsp;
TAKDSPContext tdsp;
TAKStreamInfo ti; TAKStreamInfo ti;
GetBitContext gb; ///< bitstream reader initialized to start at the current frame GetBitContext gb; ///< bitstream reader initialized to start at the current frame
...@@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx) ...@@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
TAKDecContext *s = avctx->priv_data; TAKDecContext *s = avctx->priv_data;
ff_audiodsp_init(&s->adsp); ff_audiodsp_init(&s->adsp);
ff_takdsp_init(&s->tdsp);
s->avctx = avctx; s->avctx = avctx;
avctx->bits_per_raw_sample = avctx->bits_per_coded_sample; avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
...@@ -541,46 +544,32 @@ static int decode_channel(TAKDecContext *s, int chan) ...@@ -541,46 +544,32 @@ static int decode_channel(TAKDecContext *s, int chan)
static int decorrelate(TAKDecContext *s, int c1, int c2, int length) static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
{ {
GetBitContext *gb = &s->gb; GetBitContext *gb = &s->gb;
int32_t *p1 = s->decoded[c1] + 1; int32_t *p1 = s->decoded[c1] + (s->dmode > 5);
int32_t *p2 = s->decoded[c2] + 1; int32_t *p2 = s->decoded[c2] + (s->dmode > 5);
int32_t bp1 = p1[0];
int32_t bp2 = p2[0];
int i; int i;
int dshift, dfactor; int dshift, dfactor;
length += s->dmode < 6;
switch (s->dmode) { switch (s->dmode) {
case 1: /* left/side */ case 1: /* left/side */
for (i = 0; i < length; i++) { s->tdsp.decorrelate_ls(p1, p2, length);
int32_t a = p1[i];
int32_t b = p2[i];
p2[i] = a + b;
}
break; break;
case 2: /* side/right */ case 2: /* side/right */
for (i = 0; i < length; i++) { s->tdsp.decorrelate_sr(p1, p2, length);
int32_t a = p1[i];
int32_t b = p2[i];
p1[i] = b - a;
}
break; break;
case 3: /* side/mid */ case 3: /* side/mid */
for (i = 0; i < length; i++) { s->tdsp.decorrelate_sm(p1, p2, length);
int32_t a = p1[i];
int32_t b = p2[i];
a -= b >> 1;
p1[i] = a;
p2[i] = a + b;
}
break; break;
case 4: /* side/left with scale factor */ case 4: /* side/left with scale factor */
FFSWAP(int32_t*, p1, p2); FFSWAP(int32_t*, p1, p2);
FFSWAP(int32_t, bp1, bp2);
case 5: /* side/right with scale factor */ case 5: /* side/right with scale factor */
dshift = get_bits_esc4(gb); dshift = get_bits_esc4(gb);
dfactor = get_sbits(gb, 10); dfactor = get_sbits(gb, 10);
for (i = 0; i < length; i++) { s->tdsp.decorrelate_sf(p1, p2, length, dshift, dfactor);
int32_t a = p1[i];
int32_t b = p2[i];
b = dfactor * (b >> dshift) + 128 >> 8 << dshift;
p1[i] = b - a;
}
break; break;
case 6: case 6:
FFSWAP(int32_t*, p1, p2); FFSWAP(int32_t*, p1, p2);
...@@ -664,6 +653,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length) ...@@ -664,6 +653,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
} }
} }
if (s->dmode > 0 && s->dmode < 6) {
p1[0] = bp1;
p2[0] = bp2;
}
return 0; return 0;
} }
......
/*
* TAK decoder
* Copyright (c) 2015 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "takdsp.h"
#include "config.h"
static void decorrelate_ls(int32_t *p1, int32_t *p2, int length)
{
int i;
for (i = 0; i < length; i++) {
int32_t a = p1[i];
int32_t b = p2[i];
p2[i] = a + b;
}
}
static void decorrelate_sr(int32_t *p1, int32_t *p2, int length)
{
int i;
for (i = 0; i < length; i++) {
int32_t a = p1[i];
int32_t b = p2[i];
p1[i] = b - a;
}
}
static void decorrelate_sm(int32_t *p1, int32_t *p2, int length)
{
int i;
for (i = 0; i < length; i++) {
int32_t a = p1[i];
int32_t b = p2[i];
a -= b >> 1;
p1[i] = a;
p2[i] = a + b;
}
}
static void decorrelate_sf(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor)
{
int i;
for (i = 0; i < length; i++) {
int32_t a = p1[i];
int32_t b = p2[i];
b = dfactor * (b >> dshift) + 128 >> 8 << dshift;
p1[i] = b - a;
}
}
av_cold void ff_takdsp_init(TAKDSPContext *c)
{
c->decorrelate_ls = decorrelate_ls;
c->decorrelate_sr = decorrelate_sr;
c->decorrelate_sm = decorrelate_sm;
c->decorrelate_sf = decorrelate_sf;
if (ARCH_X86)
ff_takdsp_init_x86(c);
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_TAKDSP_H
#define AVCODEC_TAKDSP_H
#include <stdint.h>
typedef struct TAKDSPContext {
void (*decorrelate_ls)(int32_t *p1, int32_t *p2, int length);
void (*decorrelate_sr)(int32_t *p1, int32_t *p2, int length);
void (*decorrelate_sm)(int32_t *p1, int32_t *p2, int length);
void (*decorrelate_sf)(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
} TAKDSPContext;
void ff_takdsp_init(TAKDSPContext *c);
void ff_takdsp_init_x86(TAKDSPContext *c);
#endif /* AVCODEC_TAKDSP_H */
...@@ -56,6 +56,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o ...@@ -56,6 +56,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
...@@ -152,6 +153,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o ...@@ -152,6 +153,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
......
;******************************************************************************
;* TAK DSP SIMD optimizations
;*
;* Copyright (C) 2015 Paul B Mahol
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
pd_128: times 4 dd 128
SECTION .text
INIT_XMM sse2
cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
shl lengthd, 2
add p1q, lengthq
add p2q, lengthq
neg lengthq
.loop:
mova m0, [p1q+lengthq+mmsize*0]
mova m1, [p1q+lengthq+mmsize*1]
paddd m0, [p2q+lengthq+mmsize*0]
paddd m1, [p2q+lengthq+mmsize*1]
mova [p2q+lengthq+mmsize*0], m0
mova [p2q+lengthq+mmsize*1], m1
add lengthq, mmsize*2
jl .loop
REP_RET
cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
shl lengthd, 2
add p1q, lengthq
add p2q, lengthq
neg lengthq
.loop:
mova m0, [p2q+lengthq+mmsize*0]
mova m1, [p2q+lengthq+mmsize*1]
psubd m0, [p1q+lengthq+mmsize*0]
psubd m1, [p1q+lengthq+mmsize*1]
mova [p1q+lengthq+mmsize*0], m0
mova [p1q+lengthq+mmsize*1], m1
add lengthq, mmsize*2
jl .loop
REP_RET
cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
shl lengthd, 2
add p1q, lengthq
add p2q, lengthq
neg lengthq
.loop:
mova m0, [p1q+lengthq]
mova m1, [p2q+lengthq]
mova m3, [p1q+lengthq+mmsize]
mova m4, [p2q+lengthq+mmsize]
mova m2, m1
mova m5, m4
psrld m2, 1
psrld m5, 1
psubd m0, m2
psubd m3, m5
paddd m1, m0
paddd m4, m3
mova [p1q+lengthq], m0
mova [p2q+lengthq], m1
mova [p1q+lengthq+mmsize], m3
mova [p2q+lengthq+mmsize], m4
add lengthq, mmsize*2
jl .loop
REP_RET
INIT_XMM sse4
cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
shl lengthd, 2
add p1q, lengthq
add p2q, lengthq
neg lengthq
movd m2, dshiftm
movd m3, dfactorm
pshufd m3, m3, 0
mova m4, [pd_128]
.loop:
mova m0, [p1q+lengthq]
mova m1, [p2q+lengthq]
psrld m1, m2
pmulld m1, m3
paddd m1, m4
psrld m1, 8
pslld m1, m2
psubd m1, m0
mova [p1q+lengthq], m1
add lengthq, mmsize
jl .loop
REP_RET
/*
* Copyright (c) 2015 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/takdsp.h"
#include "libavutil/x86/cpu.h"
#include "config.h"
void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length);
void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length);
void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length);
void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
av_cold void ff_takdsp_init_x86(TAKDSPContext *c)
{
#if HAVE_YASM
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
c->decorrelate_ls = ff_tak_decorrelate_ls_sse2;
c->decorrelate_sr = ff_tak_decorrelate_sr_sse2;
c->decorrelate_sm = ff_tak_decorrelate_sm_sse2;
}
if (EXTERNAL_SSE4(cpu_flags)) {
c->decorrelate_sf = ff_tak_decorrelate_sf_sse4;
}
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment