Commit e7078e84 authored by Anton Khirnov's avatar Anton Khirnov

hevcdsp: add x86 SIMD for MC

parent 0cef06df
...@@ -38,9 +38,9 @@ ...@@ -38,9 +38,9 @@
#include "golomb.h" #include "golomb.h"
#include "hevc.h" #include "hevc.h"
const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 }; const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
const uint8_t ff_hevc_qpel_extra_after[4] = { 0, 3, 4, 4 }; const uint8_t ff_hevc_qpel_extra_after[4] = { 0, 4, 4, 4 };
const uint8_t ff_hevc_qpel_extra[4] = { 0, 6, 7, 6 }; const uint8_t ff_hevc_qpel_extra[4] = { 0, 7, 7, 7 };
static const uint8_t scan_1x1[1] = { 0 }; static const uint8_t scan_1x1[1] = { 0 };
......
...@@ -740,7 +740,7 @@ typedef struct HEVCPredContext { ...@@ -740,7 +740,7 @@ typedef struct HEVCPredContext {
} HEVCPredContext; } HEVCPredContext;
typedef struct HEVCLocalContext { typedef struct HEVCLocalContext {
DECLARE_ALIGNED(16, int16_t, mc_buffer[(MAX_PB_SIZE + 7) * MAX_PB_SIZE]); DECLARE_ALIGNED(16, int16_t, mc_buffer[(MAX_PB_SIZE + 24) * MAX_PB_SIZE]);
uint8_t cabac_state[HEVC_CONTEXTS]; uint8_t cabac_state[HEVC_CONTEXTS];
uint8_t first_qp_group; uint8_t first_qp_group;
......
...@@ -89,7 +89,7 @@ static const int8_t transform[32][32] = { ...@@ -89,7 +89,7 @@ static const int8_t transform[32][32] = {
90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 }, 90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 },
}; };
DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = { DECLARE_ALIGNED(16, const int16_t, ff_hevc_epel_coeffs[7][16]) = {
{ -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2 }, { -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2 },
{ -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2 }, { -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2 },
{ -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4 }, { -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4 },
...@@ -99,6 +99,28 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = { ...@@ -99,6 +99,28 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = {
{ -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2 }, { -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2 },
}; };
DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_coeffs8[7][16]) = {
{ -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2 },
{ -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2 },
{ -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4 },
{ -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4 },
{ -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6 },
{ -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4 },
{ -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2 },
};
DECLARE_ALIGNED(16, const int16_t, ff_hevc_qpel_coeffs[3][8]) = {
{ -1, 4, -10, 58, 17, -5, 1, 0 },
{ -1, 4, -11, 40, 40, -11, 4, -1 },
{ 0, 1, -5, 17, 58, -10, 4, -1 },
};
DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_coeffs8[3][16]) = {
{ -1, 4, -10, 58, 17, -5, 1, 0, -1, 4, -10, 58, 17, -5, 1, 0 },
{ -1, 4, -11, 40, 40, -11, 4, -1, -1, 4, -11, 40, 40, -11, 4, -1 },
{ 0, 1, -5, 17, 58, -10, 4, -1, 0, 1, -5, 17, 58, -10, 4, -1 },
};
#define BIT_DEPTH 8 #define BIT_DEPTH 8
#include "hevcdsp_template.c" #include "hevcdsp_template.c"
#undef BIT_DEPTH #undef BIT_DEPTH
......
...@@ -118,6 +118,9 @@ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); ...@@ -118,6 +118,9 @@ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth); void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
extern const int8_t ff_hevc_epel_filters[7][16]; extern const int16_t ff_hevc_epel_coeffs[7][16];
extern const int8_t ff_hevc_epel_coeffs8[7][16];
extern const int16_t ff_hevc_qpel_coeffs[3][8];
extern const int8_t ff_hevc_qpel_coeffs8[3][16];
#endif /* AVCODEC_HEVCDSP_H */ #endif /* AVCODEC_HEVCDSP_H */
...@@ -1018,7 +1018,7 @@ static inline void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride, ...@@ -1018,7 +1018,7 @@ static inline void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
int x, y; int x, y;
pixel *src = (pixel *)_src; pixel *src = (pixel *)_src;
ptrdiff_t srcstride = _srcstride / sizeof(pixel); ptrdiff_t srcstride = _srcstride / sizeof(pixel);
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; const int16_t *filter = ff_hevc_epel_coeffs[mx - 1];
int8_t filter_0 = filter[0]; int8_t filter_0 = filter[0];
int8_t filter_1 = filter[1]; int8_t filter_1 = filter[1];
int8_t filter_2 = filter[2]; int8_t filter_2 = filter[2];
...@@ -1040,7 +1040,7 @@ static inline void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride, ...@@ -1040,7 +1040,7 @@ static inline void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
int x, y; int x, y;
pixel *src = (pixel *)_src; pixel *src = (pixel *)_src;
ptrdiff_t srcstride = _srcstride / sizeof(pixel); ptrdiff_t srcstride = _srcstride / sizeof(pixel);
const int8_t *filter = ff_hevc_epel_filters[my - 1]; const int16_t *filter = ff_hevc_epel_coeffs[my - 1];
int8_t filter_0 = filter[0]; int8_t filter_0 = filter[0];
int8_t filter_1 = filter[1]; int8_t filter_1 = filter[1];
int8_t filter_2 = filter[2]; int8_t filter_2 = filter[2];
...@@ -1063,8 +1063,8 @@ static inline void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride, ...@@ -1063,8 +1063,8 @@ static inline void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
int x, y; int x, y;
pixel *src = (pixel *)_src; pixel *src = (pixel *)_src;
ptrdiff_t srcstride = _srcstride / sizeof(pixel); ptrdiff_t srcstride = _srcstride / sizeof(pixel);
const int8_t *filter_h = ff_hevc_epel_filters[mx - 1]; const int16_t *filter_h = ff_hevc_epel_coeffs[mx - 1];
const int8_t *filter_v = ff_hevc_epel_filters[my - 1]; const int16_t *filter_v = ff_hevc_epel_coeffs[my - 1];
int8_t filter_0 = filter_h[0]; int8_t filter_0 = filter_h[0];
int8_t filter_1 = filter_h[1]; int8_t filter_1 = filter_h[1];
int8_t filter_2 = filter_h[2]; int8_t filter_2 = filter_h[2];
......
...@@ -113,7 +113,8 @@ YASM-OBJS-$(CONFIG_VP8DSP) += x86/vp8dsp.o \ ...@@ -113,7 +113,8 @@ YASM-OBJS-$(CONFIG_VP8DSP) += x86/vp8dsp.o \
YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
YASM-OBJS-$(CONFIG_APE_DECODER) += x86/apedsp.o YASM-OBJS-$(CONFIG_APE_DECODER) += x86/apedsp.o
YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o
YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_deblock.o \
x86/hevc_mc.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment