me_cmp_mvi_asm.S 6.06 KB
Newer Older
1 2 3 4
/*
 * Alpha optimized DSP utils
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 */

#include "regdef.h"

/* Some nicer register names.  */
#define ta t10
#define tb t11
#define tc t12
#define td AT
/* Danger: these overlap with the argument list and the return value */
#define te a5
#define tf a4
#define tg a3
#define th v0
34

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
        .set noat
        .set noreorder
        .arch pca56
        .text

/*****************************************************************************
 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
 *
 * This code is written with a pca56 in mind. For ev6, one should
 * really take the increased latency of 3 cycles for MVI instructions
 * into account.
 *
 * It is important to keep the loading and first use of a register as
 * far apart as possible, because if a register is accessed before it
 * has been fetched from memory, the CPU will stall.
 */
        .align 4
        .globl pix_abs16x16_mvi_asm
        .ent pix_abs16x16_mvi_asm
pix_abs16x16_mvi_asm:
        .frame sp, 0, ra, 0
        .prologue 0

Måns Rullgård's avatar
Måns Rullgård committed
58
        and     a2, 7, t0
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
        clr     v0
        beq     t0, $aligned
        .align 4
$unaligned:
        /* Registers:
           line 0:
           t0:  left_u -> left lo -> left
           t1:  mid
           t2:  right_u -> right hi -> right
           t3:  ref left
           t4:  ref right
           line 1:
           t5:  left_u -> left lo -> left
           t6:  mid
           t7:  right_u -> right hi -> right
           t8:  ref left
           t9:  ref right
           temp:
           ta:  left hi
           tb:  right lo
           tc:  error left
           td:  error right  */

        /* load line 0 */
Måns Rullgård's avatar
Måns Rullgård committed
83 84 85 86 87 88 89
        ldq_u   t0, 0(a2)       # left_u
        ldq_u   t1, 8(a2)       # mid
        ldq_u   t2, 16(a2)      # right_u
        ldq     t3, 0(a1)       # ref left
        ldq     t4, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
        addq    a2, a3, a2      # pix2
90
        /* load line 1 */
Måns Rullgård's avatar
Måns Rullgård committed
91 92 93 94 95 96 97
        ldq_u   t5, 0(a2)       # left_u
        ldq_u   t6, 8(a2)       # mid
        ldq_u   t7, 16(a2)      # right_u
        ldq     t8, 0(a1)       # ref left
        ldq     t9, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
        addq    a2, a3, a2      # pix2
98
        /* calc line 0 */
Måns Rullgård's avatar
Måns Rullgård committed
99 100 101
        extql   t0, a2, t0      # left lo
        extqh   t1, a2, ta      # left hi
        extql   t1, a2, tb      # right lo
102
        or      t0, ta, t0      # left
Måns Rullgård's avatar
Måns Rullgård committed
103
        extqh   t2, a2, t2      # right hi
104 105 106 107 108 109
        perr    t3, t0, tc      # error left
        or      t2, tb, t2      # right
        perr    t4, t2, td      # error right
        addq    v0, tc, v0      # add error left
        addq    v0, td, v0      # add error left
        /* calc line 1 */
Måns Rullgård's avatar
Måns Rullgård committed
110 111 112
        extql   t5, a2, t5      # left lo
        extqh   t6, a2, ta      # left hi
        extql   t6, a2, tb      # right lo
113
        or      t5, ta, t5      # left
Måns Rullgård's avatar
Måns Rullgård committed
114
        extqh   t7, a2, t7      # right hi
115 116 117 118 119 120
        perr    t8, t5, tc      # error left
        or      t7, tb, t7      # right
        perr    t9, t7, td      # error right
        addq    v0, tc, v0      # add error left
        addq    v0, td, v0      # add error left
        /* loop */
Måns Rullgård's avatar
Måns Rullgård committed
121 122
        subq    a4,  2, a4      # h -= 2
        bne     a4, $unaligned
123 124 125 126 127
        ret

        .align 4
$aligned:
        /* load line 0 */
Måns Rullgård's avatar
Måns Rullgård committed
128 129 130 131 132 133
        ldq     t0, 0(a2)       # left
        ldq     t1, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     t2, 0(a1)       # ref left
        ldq     t3, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
134
        /* load line 1 */
Måns Rullgård's avatar
Måns Rullgård committed
135 136 137 138 139 140
        ldq     t4, 0(a2)       # left
        ldq     t5, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     t6, 0(a1)       # ref left
        ldq     t7, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
141
        /* load line 2 */
Måns Rullgård's avatar
Måns Rullgård committed
142 143 144 145 146 147
        ldq     t8, 0(a2)       # left
        ldq     t9, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     ta, 0(a1)       # ref left
        ldq     tb, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
148
        /* load line 3 */
Måns Rullgård's avatar
Måns Rullgård committed
149 150 151 152 153
        ldq     tc, 0(a2)       # left
        ldq     td, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     te, 0(a1)       # ref left
        ldq     a0, 8(a1)       # ref right
154 155
        /* calc line 0 */
        perr    t0, t2, t0      # error left
Måns Rullgård's avatar
Måns Rullgård committed
156
        addq    a1, a3, a1      # pix1
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
        perr    t1, t3, t1      # error right
        addq    v0, t0, v0      # add error left
        /* calc line 1 */
        perr    t4, t6, t0      # error left
        addq    v0, t1, v0      # add error right
        perr    t5, t7, t1      # error right
        addq    v0, t0, v0      # add error left
        /* calc line 2 */
        perr    t8, ta, t0      # error left
        addq    v0, t1, v0      # add error right
        perr    t9, tb, t1      # error right
        addq    v0, t0, v0      # add error left
        /* calc line 3 */
        perr    tc, te, t0      # error left
        addq    v0, t1, v0      # add error right
Måns Rullgård's avatar
Måns Rullgård committed
172
        perr    td, a0, t1      # error right
173 174 175
        addq    v0, t0, v0      # add error left
        addq    v0, t1, v0      # add error right
        /* loop */
Måns Rullgård's avatar
Måns Rullgård committed
176 177
        subq    a4,  4, a4      # h -= 4
        bne     a4, $aligned
178 179
        ret
        .end pix_abs16x16_mvi_asm