Commit 94723083 authored by Clément Bœsch's avatar Clément Bœsch

Merge commit '112cee02'

* commit '112cee02':
  hevc: Add SSE2 and AVX IDCT
Merged-by: 's avatarClément Bœsch <u@pkh.me>
parents 05510ec0 112cee02
......@@ -110,3 +110,4 @@ Extra changes needed to be aligned with Libav:
- Switching our examples to the new encode/decode API (see 67d28f4a0f)
- AC3 speed-up for our fixed version (see a9ba59591e)
- HEVC IDCT bit depth 12-bit support (Libav added 8 and 10 but doesn't have 12)
This diff is collapsed.
......@@ -58,18 +58,31 @@ LFL_FUNCS(uint8_t, 8, avx)
LFL_FUNCS(uint8_t, 10, avx)
LFL_FUNCS(uint8_t, 12, avx)
#define IDCT_FUNCS(W, opt) \
#define IDCT_DC_FUNCS(W, opt) \
void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
IDCT_FUNCS(4x4, mmxext);
IDCT_FUNCS(8x8, mmxext);
IDCT_FUNCS(8x8, sse2);
IDCT_FUNCS(16x16, sse2);
IDCT_FUNCS(32x32, sse2);
IDCT_FUNCS(16x16, avx2);
IDCT_FUNCS(32x32, avx2);
IDCT_DC_FUNCS(4x4, mmxext);
IDCT_DC_FUNCS(8x8, mmxext);
IDCT_DC_FUNCS(8x8, sse2);
IDCT_DC_FUNCS(16x16, sse2);
IDCT_DC_FUNCS(32x32, sse2);
IDCT_DC_FUNCS(16x16, avx2);
IDCT_DC_FUNCS(32x32, avx2);
#define IDCT_FUNCS(opt) \
void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
IDCT_FUNCS(sse2)
IDCT_FUNCS(avx)
#define mc_rep_func(name, bitd, step, W, opt) \
void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
......@@ -709,6 +722,8 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
c->idct[2] = ff_hevc_idct_16x16_8_sse2;
c->idct[3] = ff_hevc_idct_32x32_8_sse2;
}
SAO_BAND_INIT(8, sse2);
......@@ -716,6 +731,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
c->idct[0] = ff_hevc_idct_4x4_8_sse2;
c->idct[1] = ff_hevc_idct_8x8_8_sse2;
c->add_residual[1] = ff_hevc_add_residual8_8_sse2;
c->add_residual[2] = ff_hevc_add_residual16_8_sse2;
c->add_residual[3] = ff_hevc_add_residual32_8_sse2;
......@@ -745,9 +763,15 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
c->idct[2] = ff_hevc_idct_16x16_8_avx;
c->idct[3] = ff_hevc_idct_32x32_8_avx;
}
SAO_BAND_INIT(8, avx);
c->idct[0] = ff_hevc_idct_4x4_8_avx;
c->idct[1] = ff_hevc_idct_8x8_8_avx;
c->add_residual[1] = ff_hevc_add_residual8_8_avx;
c->add_residual[2] = ff_hevc_add_residual16_8_avx;
c->add_residual[3] = ff_hevc_add_residual32_8_avx;
......@@ -864,6 +888,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
c->idct[2] = ff_hevc_idct_16x16_10_sse2;
c->idct[3] = ff_hevc_idct_32x32_10_sse2;
}
SAO_BAND_INIT(10, sse2);
SAO_EDGE_INIT(10, sse2);
......@@ -872,6 +899,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
c->idct[0] = ff_hevc_idct_4x4_10_sse2;
c->idct[1] = ff_hevc_idct_8x8_10_sse2;
c->add_residual[1] = ff_hevc_add_residual8_10_sse2;
c->add_residual[2] = ff_hevc_add_residual16_10_sse2;
c->add_residual[3] = ff_hevc_add_residual32_10_sse2;
......@@ -897,7 +927,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
c->idct[2] = ff_hevc_idct_16x16_10_avx;
c->idct[3] = ff_hevc_idct_32x32_10_avx;
}
c->idct[0] = ff_hevc_idct_4x4_10_avx;
c->idct[1] = ff_hevc_idct_8x8_10_avx;
SAO_BAND_INIT(10, avx);
}
if (EXTERNAL_AVX2(cpu_flags)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment