Commit 3f07f12f authored by Kieran Kunhya's avatar Kieran Kunhya Committed by Rostislav Pehlivanov

diracdec: Template DSP functions adding 10-bit versions

parent 95536898
This diff is collapsed.
......@@ -30,21 +30,21 @@ typedef short IDWTELEM;
#define MAX_DECOMPOSITIONS 8
typedef struct DWTCompose {
IDWTELEM *b[MAX_DWT_SUPPORT];
uint8_t *b[MAX_DWT_SUPPORT];
int y;
} DWTCompose;
struct DWTContext;
// Possible prototypes for vertical_compose functions
typedef void (*vertical_compose_2tap)(IDWTELEM *b0, IDWTELEM *b1, int width);
typedef void (*vertical_compose_3tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width);
typedef void (*vertical_compose_5tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width);
typedef void (*vertical_compose_9tap)(IDWTELEM *dst, IDWTELEM *b[8], int width);
typedef void (*vertical_compose_2tap)(uint8_t *b0, uint8_t *b1, int width);
typedef void (*vertical_compose_3tap)(uint8_t *b0, uint8_t *b1, uint8_t *b2, int width);
typedef void (*vertical_compose_5tap)(uint8_t *b0, uint8_t *b1, uint8_t *b2, uint8_t *b3, uint8_t *b4, int width);
typedef void (*vertical_compose_9tap)(uint8_t *dst, uint8_t *b[8], int width);
typedef struct DWTContext {
IDWTELEM *buffer;
IDWTELEM *temp;
uint8_t *buffer;
uint8_t *temp;
int width;
int height;
int stride;
......@@ -57,7 +57,7 @@ typedef struct DWTContext {
void (*vertical_compose_l1)(void);
void (*vertical_compose_h1)(void);
void (*vertical_compose)(void); ///< one set of lowpass and highpass combined
void (*horizontal_compose)(IDWTELEM *b, IDWTELEM *tmp, int width);
void (*horizontal_compose)(uint8_t *b, uint8_t *tmp, int width);
DWTCompose cs[MAX_DECOMPOSITIONS];
} DWTContext;
......@@ -76,9 +76,9 @@ enum dwt_type {
};
// -1 if an error occurred, e.g. the dwt_type isn't recognized
int ff_spatial_idwt_init2(DWTContext *d, IDWTELEM *buffer, int width, int height,
int ff_spatial_idwt_init2(DWTContext *d, uint8_t *buffer, int width, int height,
int stride, enum dwt_type type, int decomposition_count,
IDWTELEM *temp);
uint8_t *temp, int bit_depth);
void ff_spatial_idwt_slice2(DWTContext *d, int y);
......
This diff is collapsed.
......@@ -1687,16 +1687,16 @@ static int dirac_decode_frame_internal(DiracContext *s)
memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height);
decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
}
ret = ff_spatial_idwt_init2(&d, (int16_t*)p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride >> 1,
s->wavelet_idx+2, s->wavelet_depth, (int16_t*)p->idwt_tmp);
ret = ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp, s->bit_depth);
if (ret < 0)
return ret;
if (!s->num_refs) { /* intra */
for (y = 0; y < p->height; y += 16) {
ff_spatial_idwt_slice2(&d, y+16); /* decode */
s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
(int16_t*)(p->idwt_buf) + y*(p->idwt_stride >> 1), (p->idwt_stride >> 1), p->width, 16);
s->diracdsp.put_signed_rect_clamped[s->pshift](frame + y*p->stride, p->stride,
p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
}
} else { /* inter */
int rowheight = p->ybsep*p->stride;
......
......@@ -135,9 +135,10 @@ ADD_OBMC(8)
ADD_OBMC(16)
ADD_OBMC(32)
static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
static void put_signed_rect_clamped_8bit_c(uint8_t *dst, int dst_stride, const uint8_t *_src, int src_stride, int width, int height)
{
int x, y;
int16_t *src = (int16_t *)_src;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x+=4) {
dst[x ] = av_clip_uint8(src[x ] + 128);
......@@ -146,7 +147,24 @@ static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_
dst[x+3] = av_clip_uint8(src[x+3] + 128);
}
dst += dst_stride;
src += src_stride;
src += src_stride >> 1;
}
}
static void put_signed_rect_clamped_10bit_c(uint8_t *_dst, int dst_stride, const uint8_t *_src, int src_stride, int width, int height)
{
int x, y;
uint16_t *dst = (uint16_t *)_dst;
int32_t *src = (int32_t *)_src;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x+=4) {
dst[x ] = av_clip(src[x ] + 512, 0, (1 << 10) - 1);
dst[x+1] = av_clip(src[x+1] + 512, 0, (1 << 10) - 1);
dst[x+2] = av_clip(src[x+2] + 512, 0, (1 << 10) - 1);
dst[x+3] = av_clip(src[x+3] + 512, 0, (1 << 10) - 1);
}
dst += dst_stride >> 1;
src += src_stride >> 2;
}
}
......@@ -177,7 +195,8 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
{
c->dirac_hpel_filter = dirac_hpel_filter;
c->add_rect_clamped = add_rect_clamped_c;
c->put_signed_rect_clamped = put_signed_rect_clamped_c;
c->put_signed_rect_clamped[0] = put_signed_rect_clamped_8bit_c;
c->put_signed_rect_clamped[1] = put_signed_rect_clamped_10bit_c;
c->add_dirac_obmc[0] = add_obmc8_c;
c->add_dirac_obmc[1] = add_obmc16_c;
......
......@@ -41,8 +41,8 @@ typedef struct {
void (*put_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
void (*avg_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
void (*put_signed_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
void (*put_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
void (*put_signed_rect_clamped[3])(uint8_t *dst/*align 16*/, int dst_stride, const uint8_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
void (*put_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const uint8_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
......
......@@ -25,17 +25,20 @@
#include "dirac_dwt.h"
#define COMPOSE_VERTICAL(ext, align) \
void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
\
static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
\
static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
{ \
int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
\
for(i=width_align; i<width; i++) \
b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
......@@ -43,9 +46,12 @@ static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
} \
\
static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
{ \
int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
\
for(i=width_align; i<width; i++) \
b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
......@@ -53,10 +59,15 @@ static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
} \
\
static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
IDWTELEM *b3, IDWTELEM *b4, int width) \
static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
uint8_t *_b3, uint8_t *_b4, int width) \
{ \
int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
int16_t *b3 = (int16_t *)_b3; \
int16_t *b4 = (int16_t *)_b4; \
\
for(i=width_align; i<width; i++) \
b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
......@@ -64,19 +75,26 @@ static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
} \
\
static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
IDWTELEM *b3, IDWTELEM *b4, int width) \
static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
uint8_t *_b3, uint8_t *_b4, int width) \
{ \
int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
int16_t *b3 = (int16_t *)_b3; \
int16_t *b4 = (int16_t *)_b4; \
\
for(i=width_align; i<width; i++) \
b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
\
ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
} \
static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
{ \
int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
\
for(i=width_align; i<width; i++) { \
b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
......@@ -85,10 +103,13 @@ static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
\
ff_vertical_compose_haar##ext(b0, b1, width_align); \
} \
static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
{\
int w2= w>>1;\
int x= w2 - (w2&(align-1));\
int16_t *b = (int16_t *)_b; \
int16_t *tmp = (int16_t *)_tmp; \
\
ff_horizontal_compose_haar0i##ext(b, tmp, w);\
\
for (; x < w2; x++) {\
......@@ -96,10 +117,13 @@ static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
}\
}\
static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
{\
int w2= w>>1;\
int x= w2 - (w2&(align-1));\
int16_t *b = (int16_t *)_b; \
int16_t *tmp = (int16_t *)_tmp; \
\
ff_horizontal_compose_haar1i##ext(b, tmp, w);\
\
for (; x < w2; x++) {\
......@@ -116,12 +140,15 @@ COMPOSE_VERTICAL(_mmx, 4)
COMPOSE_VERTICAL(_sse2, 8)
void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
{
int w2= w>>1;
int x= w2 - (w2&7);
int16_t *b = (int16_t *)_b;
int16_t *tmp = (int16_t *)_tmp;
ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
for (; x < w2; x++) {
......
......@@ -130,7 +130,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
c->dirac_hpel_filter = dirac_hpel_filter_mmx;
c->add_rect_clamped = ff_add_rect_clamped_mmx;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
#endif
PIXFUNC(put, 0, mmx);
PIXFUNC(avg, 0, mmx);
......@@ -143,7 +143,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
if (EXTERNAL_SSE2(mm_flags)) {
c->dirac_hpel_filter = dirac_hpel_filter_sse2;
c->add_rect_clamped = ff_add_rect_clamped_sse2;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;
c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
......
......@@ -150,7 +150,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w,
%endif
.loopy:
lea src2q, [srcq+src_strideq*2]
lea src2q, [srcq+src_strideq]
lea dst2q, [dstq+dst_strideq]
.loopx:
sub wd, mmsize
......@@ -164,7 +164,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w,
mova [dst2q+wq], m2
jg .loopx
lea srcq, [srcq+src_strideq*4]
lea srcq, [srcq+src_strideq*2]
lea dstq, [dstq+dst_strideq*2]
sub hd, 2
mov wd, wspill
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment