Commit 58dabf7b authored by Reimar Döffinger's avatar Reimar Döffinger

Fix png decoding on x86.

Line sizes are only 8-byte aligned, so use unaliged loads
for add_bytes_l2 pointers.
Increasing the alignment requirement to 16 seemed a bit extreme
(png may be used for rather small sizes).
Also fix a mov that had its arguments swapped, leading
add_bytes_l2 being applied on up to 8 bytes too few.
Signed-off-by: 's avatarReimar Döffinger <Reimar.Doeffinger@gmx.de>
parent da1ba4e8
...@@ -26,8 +26,8 @@ ...@@ -26,8 +26,8 @@
typedef struct PNGDSPContext { typedef struct PNGDSPContext {
void (*add_bytes_l2)(uint8_t *dst /* align 16 */, void (*add_bytes_l2)(uint8_t *dst /* align 16 */,
uint8_t *src1 /* align 16 */, uint8_t *src1,
uint8_t *src2 /* align 16 */, int w); uint8_t *src2, int w);
/* this might write to dst[w] */ /* this might write to dst[w] */
void (*add_paeth_prediction)(uint8_t *dst, uint8_t *src, void (*add_paeth_prediction)(uint8_t *dst, uint8_t *src,
......
...@@ -43,12 +43,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i ...@@ -43,12 +43,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
and waq, ~(mmsize*2-1) and waq, ~(mmsize*2-1)
jmp .end_v jmp .end_v
.loop_v: .loop_v:
mova m0, [src1q+iq] movu m0, [src2q+iq]
mova m1, [src1q+iq+mmsize] movu m1, [src2q+iq+mmsize]
paddb m0, [src2q+iq] paddb m0, [src1q+iq]
paddb m1, [src2q+iq+mmsize] paddb m1, [src1q+iq+mmsize]
mova [dstq+iq ], m0 movu [dstq+iq ], m0
mova [dstq+iq+mmsize], m1 movu [dstq+iq+mmsize], m1
add iq, mmsize*2 add iq, mmsize*2
.end_v: .end_v:
cmp iq, waq cmp iq, waq
...@@ -56,12 +56,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i ...@@ -56,12 +56,12 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
%if mmsize == 16 %if mmsize == 16
; vector loop ; vector loop
mov wq, waq mov waq, wq
and waq, ~7 and waq, ~7
jmp .end_l jmp .end_l
.loop_l: .loop_l:
movq mm0, [src1q+iq] movq mm0, [src2q+iq]
paddb mm0, [src2q+iq] paddb mm0, [src1q+iq]
movq [dstq+iq ], mm0 movq [dstq+iq ], mm0
add iq, 8 add iq, 8
.end_l: .end_l:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment