Commit 3f07f12f authored by Kieran Kunhya's avatar Kieran Kunhya Committed by Rostislav Pehlivanov

diracdec: Template DSP functions adding 10-bit versions

parent 95536898
This diff is collapsed.
...@@ -30,21 +30,21 @@ typedef short IDWTELEM; ...@@ -30,21 +30,21 @@ typedef short IDWTELEM;
#define MAX_DECOMPOSITIONS 8 #define MAX_DECOMPOSITIONS 8
typedef struct DWTCompose { typedef struct DWTCompose {
IDWTELEM *b[MAX_DWT_SUPPORT]; uint8_t *b[MAX_DWT_SUPPORT];
int y; int y;
} DWTCompose; } DWTCompose;
struct DWTContext; struct DWTContext;
// Possible prototypes for vertical_compose functions // Possible prototypes for vertical_compose functions
typedef void (*vertical_compose_2tap)(IDWTELEM *b0, IDWTELEM *b1, int width); typedef void (*vertical_compose_2tap)(uint8_t *b0, uint8_t *b1, int width);
typedef void (*vertical_compose_3tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); typedef void (*vertical_compose_3tap)(uint8_t *b0, uint8_t *b1, uint8_t *b2, int width);
typedef void (*vertical_compose_5tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); typedef void (*vertical_compose_5tap)(uint8_t *b0, uint8_t *b1, uint8_t *b2, uint8_t *b3, uint8_t *b4, int width);
typedef void (*vertical_compose_9tap)(IDWTELEM *dst, IDWTELEM *b[8], int width); typedef void (*vertical_compose_9tap)(uint8_t *dst, uint8_t *b[8], int width);
typedef struct DWTContext { typedef struct DWTContext {
IDWTELEM *buffer; uint8_t *buffer;
IDWTELEM *temp; uint8_t *temp;
int width; int width;
int height; int height;
int stride; int stride;
...@@ -57,7 +57,7 @@ typedef struct DWTContext { ...@@ -57,7 +57,7 @@ typedef struct DWTContext {
void (*vertical_compose_l1)(void); void (*vertical_compose_l1)(void);
void (*vertical_compose_h1)(void); void (*vertical_compose_h1)(void);
void (*vertical_compose)(void); ///< one set of lowpass and highpass combined void (*vertical_compose)(void); ///< one set of lowpass and highpass combined
void (*horizontal_compose)(IDWTELEM *b, IDWTELEM *tmp, int width); void (*horizontal_compose)(uint8_t *b, uint8_t *tmp, int width);
DWTCompose cs[MAX_DECOMPOSITIONS]; DWTCompose cs[MAX_DECOMPOSITIONS];
} DWTContext; } DWTContext;
...@@ -76,9 +76,9 @@ enum dwt_type { ...@@ -76,9 +76,9 @@ enum dwt_type {
}; };
// -1 if an error occurred, e.g. the dwt_type isn't recognized // -1 if an error occurred, e.g. the dwt_type isn't recognized
int ff_spatial_idwt_init2(DWTContext *d, IDWTELEM *buffer, int width, int height, int ff_spatial_idwt_init2(DWTContext *d, uint8_t *buffer, int width, int height,
int stride, enum dwt_type type, int decomposition_count, int stride, enum dwt_type type, int decomposition_count,
IDWTELEM *temp); uint8_t *temp, int bit_depth);
void ff_spatial_idwt_slice2(DWTContext *d, int y); void ff_spatial_idwt_slice2(DWTContext *d, int y);
......
This diff is collapsed.
...@@ -1687,16 +1687,16 @@ static int dirac_decode_frame_internal(DiracContext *s) ...@@ -1687,16 +1687,16 @@ static int dirac_decode_frame_internal(DiracContext *s)
memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height); memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height);
decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */ decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
} }
ret = ff_spatial_idwt_init2(&d, (int16_t*)p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride >> 1, ret = ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
s->wavelet_idx+2, s->wavelet_depth, (int16_t*)p->idwt_tmp); s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp, s->bit_depth);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (!s->num_refs) { /* intra */ if (!s->num_refs) { /* intra */
for (y = 0; y < p->height; y += 16) { for (y = 0; y < p->height; y += 16) {
ff_spatial_idwt_slice2(&d, y+16); /* decode */ ff_spatial_idwt_slice2(&d, y+16); /* decode */
s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride, s->diracdsp.put_signed_rect_clamped[s->pshift](frame + y*p->stride, p->stride,
(int16_t*)(p->idwt_buf) + y*(p->idwt_stride >> 1), (p->idwt_stride >> 1), p->width, 16); p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
} }
} else { /* inter */ } else { /* inter */
int rowheight = p->ybsep*p->stride; int rowheight = p->ybsep*p->stride;
......
...@@ -135,9 +135,10 @@ ADD_OBMC(8) ...@@ -135,9 +135,10 @@ ADD_OBMC(8)
ADD_OBMC(16) ADD_OBMC(16)
ADD_OBMC(32) ADD_OBMC(32)
static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height) static void put_signed_rect_clamped_8bit_c(uint8_t *dst, int dst_stride, const uint8_t *_src, int src_stride, int width, int height)
{ {
int x, y; int x, y;
int16_t *src = (int16_t *)_src;
for (y = 0; y < height; y++) { for (y = 0; y < height; y++) {
for (x = 0; x < width; x+=4) { for (x = 0; x < width; x+=4) {
dst[x ] = av_clip_uint8(src[x ] + 128); dst[x ] = av_clip_uint8(src[x ] + 128);
...@@ -146,7 +147,24 @@ static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_ ...@@ -146,7 +147,24 @@ static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_
dst[x+3] = av_clip_uint8(src[x+3] + 128); dst[x+3] = av_clip_uint8(src[x+3] + 128);
} }
dst += dst_stride; dst += dst_stride;
src += src_stride; src += src_stride >> 1;
}
}
static void put_signed_rect_clamped_10bit_c(uint8_t *_dst, int dst_stride, const uint8_t *_src, int src_stride, int width, int height)
{
int x, y;
uint16_t *dst = (uint16_t *)_dst;
int32_t *src = (int32_t *)_src;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x+=4) {
dst[x ] = av_clip(src[x ] + 512, 0, (1 << 10) - 1);
dst[x+1] = av_clip(src[x+1] + 512, 0, (1 << 10) - 1);
dst[x+2] = av_clip(src[x+2] + 512, 0, (1 << 10) - 1);
dst[x+3] = av_clip(src[x+3] + 512, 0, (1 << 10) - 1);
}
dst += dst_stride >> 1;
src += src_stride >> 2;
} }
} }
...@@ -177,7 +195,8 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c) ...@@ -177,7 +195,8 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
{ {
c->dirac_hpel_filter = dirac_hpel_filter; c->dirac_hpel_filter = dirac_hpel_filter;
c->add_rect_clamped = add_rect_clamped_c; c->add_rect_clamped = add_rect_clamped_c;
c->put_signed_rect_clamped = put_signed_rect_clamped_c; c->put_signed_rect_clamped[0] = put_signed_rect_clamped_8bit_c;
c->put_signed_rect_clamped[1] = put_signed_rect_clamped_10bit_c;
c->add_dirac_obmc[0] = add_obmc8_c; c->add_dirac_obmc[0] = add_obmc8_c;
c->add_dirac_obmc[1] = add_obmc16_c; c->add_dirac_obmc[1] = add_obmc16_c;
......
...@@ -41,8 +41,8 @@ typedef struct { ...@@ -41,8 +41,8 @@ typedef struct {
void (*put_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h); void (*put_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
void (*avg_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h); void (*avg_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
void (*put_signed_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/); void (*put_signed_rect_clamped[3])(uint8_t *dst/*align 16*/, int dst_stride, const uint8_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
void (*put_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/); void (*put_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const uint8_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/); void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
......
...@@ -25,17 +25,20 @@ ...@@ -25,17 +25,20 @@
#include "dirac_dwt.h" #include "dirac_dwt.h"
#define COMPOSE_VERTICAL(ext, align) \ #define COMPOSE_VERTICAL(ext, align) \
void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \ void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \ void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \ void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \ void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \ void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\ void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\ void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
\ \
static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \ static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
{ \ { \
int i, width_align = width&~(align-1); \ int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
\ \
for(i=width_align; i<width; i++) \ for(i=width_align; i<width; i++) \
b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \ b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
...@@ -43,9 +46,12 @@ static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ...@@ -43,9 +46,12 @@ static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \ ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
} \ } \
\ \
static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \ static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
{ \ { \
int i, width_align = width&~(align-1); \ int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
\ \
for(i=width_align; i<width; i++) \ for(i=width_align; i<width; i++) \
b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \ b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
...@@ -53,10 +59,15 @@ static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELE ...@@ -53,10 +59,15 @@ static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \ ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
} \ } \
\ \
static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \ static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
IDWTELEM *b3, IDWTELEM *b4, int width) \ uint8_t *_b3, uint8_t *_b4, int width) \
{ \ { \
int i, width_align = width&~(align-1); \ int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
int16_t *b3 = (int16_t *)_b3; \
int16_t *b4 = (int16_t *)_b4; \
\ \
for(i=width_align; i<width; i++) \ for(i=width_align; i<width; i++) \
b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \ b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
...@@ -64,19 +75,26 @@ static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM ...@@ -64,19 +75,26 @@ static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \ ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
} \ } \
\ \
static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \ static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
IDWTELEM *b3, IDWTELEM *b4, int width) \ uint8_t *_b3, uint8_t *_b4, int width) \
{ \ { \
int i, width_align = width&~(align-1); \ int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
int16_t *b2 = (int16_t *)_b2; \
int16_t *b3 = (int16_t *)_b3; \
int16_t *b4 = (int16_t *)_b4; \
\ \
for(i=width_align; i<width; i++) \ for(i=width_align; i<width; i++) \
b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \ b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
\ \
ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \ ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
} \ } \
static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \ static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
{ \ { \
int i, width_align = width&~(align-1); \ int i, width_align = width&~(align-1); \
int16_t *b0 = (int16_t *)_b0; \
int16_t *b1 = (int16_t *)_b1; \
\ \
for(i=width_align; i<width; i++) { \ for(i=width_align; i<width; i++) { \
b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \ b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
...@@ -85,10 +103,13 @@ static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \ ...@@ -85,10 +103,13 @@ static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
\ \
ff_vertical_compose_haar##ext(b0, b1, width_align); \ ff_vertical_compose_haar##ext(b0, b1, width_align); \
} \ } \
static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\ static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
{\ {\
int w2= w>>1;\ int w2= w>>1;\
int x= w2 - (w2&(align-1));\ int x= w2 - (w2&(align-1));\
int16_t *b = (int16_t *)_b; \
int16_t *tmp = (int16_t *)_tmp; \
\
ff_horizontal_compose_haar0i##ext(b, tmp, w);\ ff_horizontal_compose_haar0i##ext(b, tmp, w);\
\ \
for (; x < w2; x++) {\ for (; x < w2; x++) {\
...@@ -96,10 +117,13 @@ static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\ ...@@ -96,10 +117,13 @@ static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\ b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
}\ }\
}\ }\
static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\ static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
{\ {\
int w2= w>>1;\ int w2= w>>1;\
int x= w2 - (w2&(align-1));\ int x= w2 - (w2&(align-1));\
int16_t *b = (int16_t *)_b; \
int16_t *tmp = (int16_t *)_tmp; \
\
ff_horizontal_compose_haar1i##ext(b, tmp, w);\ ff_horizontal_compose_haar1i##ext(b, tmp, w);\
\ \
for (; x < w2; x++) {\ for (; x < w2; x++) {\
...@@ -116,12 +140,15 @@ COMPOSE_VERTICAL(_mmx, 4) ...@@ -116,12 +140,15 @@ COMPOSE_VERTICAL(_mmx, 4)
COMPOSE_VERTICAL(_sse2, 8) COMPOSE_VERTICAL(_sse2, 8)
void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w); void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w) static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
{ {
int w2= w>>1; int w2= w>>1;
int x= w2 - (w2&7); int x= w2 - (w2&7);
int16_t *b = (int16_t *)_b;
int16_t *tmp = (int16_t *)_tmp;
ff_horizontal_compose_dd97i_ssse3(b, tmp, w); ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
for (; x < w2; x++) { for (; x < w2; x++) {
......
...@@ -130,7 +130,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c) ...@@ -130,7 +130,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
c->dirac_hpel_filter = dirac_hpel_filter_mmx; c->dirac_hpel_filter = dirac_hpel_filter_mmx;
c->add_rect_clamped = ff_add_rect_clamped_mmx; c->add_rect_clamped = ff_add_rect_clamped_mmx;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx; c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
#endif #endif
PIXFUNC(put, 0, mmx); PIXFUNC(put, 0, mmx);
PIXFUNC(avg, 0, mmx); PIXFUNC(avg, 0, mmx);
...@@ -143,7 +143,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c) ...@@ -143,7 +143,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
if (EXTERNAL_SSE2(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) {
c->dirac_hpel_filter = dirac_hpel_filter_sse2; c->dirac_hpel_filter = dirac_hpel_filter_sse2;
c->add_rect_clamped = ff_add_rect_clamped_sse2; c->add_rect_clamped = ff_add_rect_clamped_sse2;
c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2; c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;
c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
......
...@@ -150,7 +150,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w, ...@@ -150,7 +150,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w,
%endif %endif
.loopy: .loopy:
lea src2q, [srcq+src_strideq*2] lea src2q, [srcq+src_strideq]
lea dst2q, [dstq+dst_strideq] lea dst2q, [dstq+dst_strideq]
.loopx: .loopx:
sub wd, mmsize sub wd, mmsize
...@@ -164,7 +164,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w, ...@@ -164,7 +164,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w,
mova [dst2q+wq], m2 mova [dst2q+wq], m2
jg .loopx jg .loopx
lea srcq, [srcq+src_strideq*4] lea srcq, [srcq+src_strideq*2]
lea dstq, [dstq+dst_strideq*2] lea dstq, [dstq+dst_strideq*2]
sub hd, 2 sub hd, 2
mov wd, wspill mov wd, wspill
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment