Commit 3b5946bc authored by Ben Avison's avatar Ben Avison Committed by Martin Storsjö

truehd: add hand-scheduled ARM asm version of ff_mlp_pack_output.

Profiling results for overall decode and the output_data function in
particular are as follows:

              Before          After
              Mean   StdDev   Mean   StdDev  Confidence  Change
6:2 total     339.6  15.1     329.3  16.0    95.8%       +3.1%  (insignificant)
6:2 function  24.6   6.0      9.9    3.1     100.0%      +148.5%
8:2 total     324.5  15.5     323.6  14.3    15.2%       +0.3%  (insignificant)
8:2 function  20.4   3.9      9.9    3.4     100.0%      +104.7%
6:6 total     572.8  20.6     539.9  24.2    100.0%      +6.1%
6:6 function  54.5   5.6      16.0   3.8     100.0%      +240.9%
8:8 total     741.5  21.2     702.5  18.5    100.0%      +5.6%
8:8 function  63.9   7.6      18.4   4.8     100.0%      +247.3%

The assembly version has also been tested with a fuzz tester to ensure that
any combinations of inputs not exercised by my available test streams still
generate mathematically identical results to the C version.
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent b9eb0341
...@@ -51,6 +51,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \ ...@@ -51,6 +51,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \
ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o
ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
arm/hpeldsp_armv6.o arm/hpeldsp_armv6.o
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
......
This diff is collapsed.
...@@ -41,6 +41,98 @@ void ff_mlp_rematrix_channel_arm(int32_t *samples, ...@@ -41,6 +41,98 @@ void ff_mlp_rematrix_channel_arm(int32_t *samples,
int access_unit_size_pow2, int access_unit_size_pow2,
int32_t mask); int32_t mask);
#define DECLARE_PACK(order,channels,shift) \
int32_t ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
#define ENUMERATE_PACK(order,channels,shift) \
ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6,
#define PACK_CHANNELS(macro,order,channels) \
macro(order,channels,0) \
macro(order,channels,1) \
macro(order,channels,2) \
macro(order,channels,3) \
macro(order,channels,4) \
macro(order,channels,5) \
macro(order,channels,mixed)
#define PACK_ORDER(macro,order) \
PACK_CHANNELS(macro,order,2) \
PACK_CHANNELS(macro,order,6) \
PACK_CHANNELS(macro,order,8)
#define PACK_ALL(macro) \
PACK_ORDER(macro,outof) \
PACK_ORDER(macro,in)
PACK_ALL(DECLARE_PACK)
#define ff_mlp_pack_output_outoforder_2ch_mixedshift_armv6 0
#define ff_mlp_pack_output_outoforder_6ch_mixedshift_armv6 0
#define ff_mlp_pack_output_outoforder_8ch_mixedshift_armv6 0
#if CONFIG_THUMB
#define ff_mlp_pack_output_outoforder_2ch_0shift_armv6 0
#define ff_mlp_pack_output_outoforder_2ch_1shift_armv6 0
#define ff_mlp_pack_output_outoforder_2ch_2shift_armv6 0
#define ff_mlp_pack_output_outoforder_2ch_3shift_armv6 0
#define ff_mlp_pack_output_outoforder_2ch_4shift_armv6 0
#define ff_mlp_pack_output_outoforder_2ch_5shift_armv6 0
#define ff_mlp_pack_output_outoforder_6ch_0shift_armv6 0
#define ff_mlp_pack_output_outoforder_6ch_1shift_armv6 0
#define ff_mlp_pack_output_outoforder_6ch_2shift_armv6 0
#define ff_mlp_pack_output_outoforder_6ch_3shift_armv6 0
#define ff_mlp_pack_output_outoforder_6ch_4shift_armv6 0
#define ff_mlp_pack_output_outoforder_6ch_5shift_armv6 0
#define ff_mlp_pack_output_outoforder_8ch_0shift_armv6 0
#define ff_mlp_pack_output_outoforder_8ch_1shift_armv6 0
#define ff_mlp_pack_output_outoforder_8ch_2shift_armv6 0
#define ff_mlp_pack_output_outoforder_8ch_3shift_armv6 0
#define ff_mlp_pack_output_outoforder_8ch_4shift_armv6 0
#define ff_mlp_pack_output_outoforder_8ch_5shift_armv6 0
#endif
static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
int8_t *output_shift,
uint8_t max_matrix_channel,
int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
{
int ch_index;
int shift = output_shift[0] < 0 || output_shift[0] > 5 ? 6 : output_shift[0];
int inorder = 1;
static int32_t (*const routine[2*3*7])(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) = {
PACK_ALL(ENUMERATE_PACK)
};
int i;
if (!is32) // don't support 16-bit output (it's not used by TrueHD)
return ff_mlp_pack_output;
switch (max_matrix_channel) {
case 1:
ch_index = 0;
break;
case 5:
ch_index = 1;
break;
case 7:
ch_index = 2;
break;
default:
return ff_mlp_pack_output;
}
for (i = 0; i <= max_matrix_channel; i++) {
if (shift != 6 && output_shift[i] != shift)
shift = 6; // indicate mixed shifts
if (ch_assign[i] != i)
inorder = 0;
}
#if CONFIG_THUMB
if (!inorder)
return ff_mlp_pack_output; // can't currently handle an order array except in ARM mode
#else
if (shift == 6 && !inorder)
return ff_mlp_pack_output; // can't currently handle both an order array and a shift array
#endif
return routine[(inorder*3+ch_index)*7+shift];
}
av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c) av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
...@@ -49,4 +141,6 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c) ...@@ -49,4 +141,6 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
c->mlp_filter_channel = ff_mlp_filter_channel_arm; c->mlp_filter_channel = ff_mlp_filter_channel_arm;
c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
} }
if (have_armv6(cpu_flags))
c->mlp_select_pack_output = mlp_select_pack_output_armv6;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment