audio_mix_init.c 12.7 KB
Newer Older
Justin Ruggles's avatar
Justin Ruggles committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "config.h"
#include "libavutil/cpu.h"
23
#include "libavutil/x86/cpu.h"
Justin Ruggles's avatar
Justin Ruggles committed
24 25
#include "libavresample/audio_mix.h"

26 27 28 29
void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
                                int out_ch, int in_ch);
void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
                                int out_ch, int in_ch);
Justin Ruggles's avatar
Justin Ruggles committed
30

31 32 33 34
void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
                                 int out_ch, int in_ch);
void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
                                 int out_ch, int in_ch);
35

36 37
void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
                                int len, int out_ch, int in_ch);
38

39 40 41 42
void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
                                int out_ch, int in_ch);
void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
                                int out_ch, int in_ch);
43

44 45 46 47 48 49
void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
                                 int out_ch, int in_ch);
void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
                                 int out_ch, int in_ch);
void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
                                 int out_ch, int in_ch);
50

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
#define DEFINE_MIX_3_8_TO_1_2(chan)                                     \
void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src,                 \
                                           float **matrix, int len,     \
                                           int out_ch, int in_ch);      \
void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src,                 \
                                           float **matrix, int len,     \
                                           int out_ch, int in_ch);      \
                                                                        \
void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src,              \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);     \
void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src,              \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);     \
                                                                        \
void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src,              \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);     \
void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src,              \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);     \
                                                                        \
void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src,                 \
                                           float **matrix, int len,     \
                                           int out_ch, int in_ch);      \
void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src,                 \
                                           float **matrix, int len,     \
                                           int out_ch, int in_ch);      \
                                                                        \
void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src,               \
                                           float **matrix, int len,     \
                                           int out_ch, int in_ch);      \
void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src,               \
                                           float **matrix, int len,     \
                                           int out_ch, int in_ch);      \
                                                                        \
void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src,                \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);     \
void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src,                \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);     \
                                                                        \
void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src,              \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);     \
void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src,              \
                                            float **matrix, int len,    \
                                            int out_ch, int in_ch);
100 101 102 103 104 105 106 107 108

DEFINE_MIX_3_8_TO_1_2(3)
DEFINE_MIX_3_8_TO_1_2(4)
DEFINE_MIX_3_8_TO_1_2(5)
DEFINE_MIX_3_8_TO_1_2(6)
DEFINE_MIX_3_8_TO_1_2(7)
DEFINE_MIX_3_8_TO_1_2(8)

#define SET_MIX_3_8_TO_1_2(chan)                                            \
109
    if (EXTERNAL_SSE(cpu_flags)) {                                          \
110 111 112 113 114 115 116
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 1, 16, 4, "SSE",                        \
                              ff_mix_ ## chan ## _to_1_fltp_flt_sse);       \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 2, 16, 4, "SSE",                        \
                              ff_mix_## chan ##_to_2_fltp_flt_sse);         \
    }                                                                       \
117
    if (EXTERNAL_SSE2(cpu_flags)) {                                         \
118 119 120 121 122 123 124
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 1, 16, 8, "SSE2",                       \
                              ff_mix_ ## chan ## _to_1_s16p_flt_sse2);      \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 2, 16, 8, "SSE2",                       \
                              ff_mix_ ## chan ## _to_2_s16p_flt_sse2);      \
    }                                                                       \
125
    if (EXTERNAL_SSE4(cpu_flags)) {                                         \
126 127 128 129 130 131 132
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 1, 16, 8, "SSE4",                       \
                              ff_mix_ ## chan ## _to_1_s16p_flt_sse4);      \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 2, 16, 8, "SSE4",                       \
                              ff_mix_ ## chan ## _to_2_s16p_flt_sse4);      \
    }                                                                       \
133
    if (EXTERNAL_AVX(cpu_flags)) {                                          \
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
        int ptr_align = 32;                                                 \
        int smp_align = 8;                                                  \
        if (ARCH_X86_32 || chan >= 6) {                                     \
            ptr_align = 16;                                                 \
            smp_align = 4;                                                  \
        }                                                                   \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 1, ptr_align, smp_align, "AVX",         \
                              ff_mix_ ## chan ## _to_1_fltp_flt_avx);       \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 2, ptr_align, smp_align, "AVX",         \
                              ff_mix_ ## chan ## _to_2_fltp_flt_avx);       \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 1, 16, 8, "AVX",                        \
                              ff_mix_ ## chan ## _to_1_s16p_flt_avx);       \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 2, 16, 8, "AVX",                        \
                              ff_mix_ ## chan ## _to_2_s16p_flt_avx);       \
    }                                                                       \
153
    if (EXTERNAL_FMA4(cpu_flags)) {                                         \
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
        int ptr_align = 32;                                                 \
        int smp_align = 8;                                                  \
        if (ARCH_X86_32 || chan >= 6) {                                     \
            ptr_align = 16;                                                 \
            smp_align = 4;                                                  \
        }                                                                   \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 1, ptr_align, smp_align, "FMA4",        \
                              ff_mix_ ## chan ## _to_1_fltp_flt_fma4);      \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 2, ptr_align, smp_align, "FMA4",        \
                              ff_mix_ ## chan ## _to_2_fltp_flt_fma4);      \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 1, 16, 8, "FMA4",                       \
                              ff_mix_ ## chan ## _to_1_s16p_flt_fma4);      \
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
                              chan, 2, 16, 8, "FMA4",                       \
                              ff_mix_ ## chan ## _to_2_s16p_flt_fma4);      \
    }

Justin Ruggles's avatar
Justin Ruggles committed
174 175
av_cold void ff_audio_mix_init_x86(AudioMix *am)
{
176
    int cpu_flags = av_get_cpu_flags();
Justin Ruggles's avatar
Justin Ruggles committed
177

178
    if (EXTERNAL_SSE(cpu_flags)) {
Justin Ruggles's avatar
Justin Ruggles committed
179 180
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                              2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
181 182
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                              1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
Justin Ruggles's avatar
Justin Ruggles committed
183
    }
184
    if (EXTERNAL_SSE2(cpu_flags)) {
185 186
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
                              2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
187 188
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
                              2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
189 190
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
                              1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
191
    }
192
    if (EXTERNAL_SSE4(cpu_flags)) {
193 194
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
                              2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
195 196
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
                              1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
197
    }
198
    if (EXTERNAL_AVX_FAST(cpu_flags)) {
Justin Ruggles's avatar
Justin Ruggles committed
199 200
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                              2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
201 202
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                              1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
203 204
    }
    if (EXTERNAL_AVX(cpu_flags)) {
205 206
        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
                              1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
Justin Ruggles's avatar
Justin Ruggles committed
207
    }
208 209 210 211 212 213 214

    SET_MIX_3_8_TO_1_2(3)
    SET_MIX_3_8_TO_1_2(4)
    SET_MIX_3_8_TO_1_2(5)
    SET_MIX_3_8_TO_1_2(6)
    SET_MIX_3_8_TO_1_2(7)
    SET_MIX_3_8_TO_1_2(8)
Justin Ruggles's avatar
Justin Ruggles committed
215
}