motion_est_mvi_asm.S 6.14 KB
Newer Older
1 2 3 4
/*
 * Alpha optimized DSP utils
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 21 22 23 24 25 26 27 28 29 30 31 32 33
 */

#include "regdef.h"

/* Some nicer register names.  */
#define ta t10
#define tb t11
#define tc t12
#define td AT
/* Danger: these overlap with the argument list and the return value */
#define te a5
#define tf a4
#define tg a3
#define th v0
34

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
        .set noat
        .set noreorder
        .arch pca56
        .text

/*****************************************************************************
 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
 *
 * This code is written with a pca56 in mind. For ev6, one should
 * really take the increased latency of 3 cycles for MVI instructions
 * into account.
 *
 * It is important to keep the loading and first use of a register as
 * far apart as possible, because if a register is accessed before it
 * has been fetched from memory, the CPU will stall.
 */
        .align 4
        .globl pix_abs16x16_mvi_asm
        .ent pix_abs16x16_mvi_asm
pix_abs16x16_mvi_asm:
        .frame sp, 0, ra, 0
        .prologue 0

58
#if CONFIG_GPROF
59 60 61 62
        lda     AT, _mcount
        jsr     AT, (AT), _mcount
#endif

Måns Rullgård's avatar
Måns Rullgård committed
63
        and     a2, 7, t0
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
        clr     v0
        beq     t0, $aligned
        .align 4
$unaligned:
        /* Registers:
           line 0:
           t0:  left_u -> left lo -> left
           t1:  mid
           t2:  right_u -> right hi -> right
           t3:  ref left
           t4:  ref right
           line 1:
           t5:  left_u -> left lo -> left
           t6:  mid
           t7:  right_u -> right hi -> right
           t8:  ref left
           t9:  ref right
           temp:
           ta:  left hi
           tb:  right lo
           tc:  error left
           td:  error right  */

        /* load line 0 */
Måns Rullgård's avatar
Måns Rullgård committed
88 89 90 91 92 93 94
        ldq_u   t0, 0(a2)       # left_u
        ldq_u   t1, 8(a2)       # mid
        ldq_u   t2, 16(a2)      # right_u
        ldq     t3, 0(a1)       # ref left
        ldq     t4, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
        addq    a2, a3, a2      # pix2
95
        /* load line 1 */
Måns Rullgård's avatar
Måns Rullgård committed
96 97 98 99 100 101 102
        ldq_u   t5, 0(a2)       # left_u
        ldq_u   t6, 8(a2)       # mid
        ldq_u   t7, 16(a2)      # right_u
        ldq     t8, 0(a1)       # ref left
        ldq     t9, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
        addq    a2, a3, a2      # pix2
103
        /* calc line 0 */
Måns Rullgård's avatar
Måns Rullgård committed
104 105 106
        extql   t0, a2, t0      # left lo
        extqh   t1, a2, ta      # left hi
        extql   t1, a2, tb      # right lo
107
        or      t0, ta, t0      # left
Måns Rullgård's avatar
Måns Rullgård committed
108
        extqh   t2, a2, t2      # right hi
109 110 111 112 113 114
        perr    t3, t0, tc      # error left
        or      t2, tb, t2      # right
        perr    t4, t2, td      # error right
        addq    v0, tc, v0      # add error left
        addq    v0, td, v0      # add error left
        /* calc line 1 */
Måns Rullgård's avatar
Måns Rullgård committed
115 116 117
        extql   t5, a2, t5      # left lo
        extqh   t6, a2, ta      # left hi
        extql   t6, a2, tb      # right lo
118
        or      t5, ta, t5      # left
Måns Rullgård's avatar
Måns Rullgård committed
119
        extqh   t7, a2, t7      # right hi
120 121 122 123 124 125
        perr    t8, t5, tc      # error left
        or      t7, tb, t7      # right
        perr    t9, t7, td      # error right
        addq    v0, tc, v0      # add error left
        addq    v0, td, v0      # add error left
        /* loop */
Måns Rullgård's avatar
Måns Rullgård committed
126 127
        subq    a4,  2, a4      # h -= 2
        bne     a4, $unaligned
128 129 130 131 132
        ret

        .align 4
$aligned:
        /* load line 0 */
Måns Rullgård's avatar
Måns Rullgård committed
133 134 135 136 137 138
        ldq     t0, 0(a2)       # left
        ldq     t1, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     t2, 0(a1)       # ref left
        ldq     t3, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
139
        /* load line 1 */
Måns Rullgård's avatar
Måns Rullgård committed
140 141 142 143 144 145
        ldq     t4, 0(a2)       # left
        ldq     t5, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     t6, 0(a1)       # ref left
        ldq     t7, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
146
        /* load line 2 */
Måns Rullgård's avatar
Måns Rullgård committed
147 148 149 150 151 152
        ldq     t8, 0(a2)       # left
        ldq     t9, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     ta, 0(a1)       # ref left
        ldq     tb, 8(a1)       # ref right
        addq    a1, a3, a1      # pix1
153
        /* load line 3 */
Måns Rullgård's avatar
Måns Rullgård committed
154 155 156 157 158
        ldq     tc, 0(a2)       # left
        ldq     td, 8(a2)       # right
        addq    a2, a3, a2      # pix2
        ldq     te, 0(a1)       # ref left
        ldq     a0, 8(a1)       # ref right
159 160
        /* calc line 0 */
        perr    t0, t2, t0      # error left
Måns Rullgård's avatar
Måns Rullgård committed
161
        addq    a1, a3, a1      # pix1
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
        perr    t1, t3, t1      # error right
        addq    v0, t0, v0      # add error left
        /* calc line 1 */
        perr    t4, t6, t0      # error left
        addq    v0, t1, v0      # add error right
        perr    t5, t7, t1      # error right
        addq    v0, t0, v0      # add error left
        /* calc line 2 */
        perr    t8, ta, t0      # error left
        addq    v0, t1, v0      # add error right
        perr    t9, tb, t1      # error right
        addq    v0, t0, v0      # add error left
        /* calc line 3 */
        perr    tc, te, t0      # error left
        addq    v0, t1, v0      # add error right
Måns Rullgård's avatar
Måns Rullgård committed
177
        perr    td, a0, t1      # error right
178 179 180
        addq    v0, t0, v0      # add error left
        addq    v0, t1, v0      # add error right
        /* loop */
Måns Rullgård's avatar
Måns Rullgård committed
181 182
        subq    a4,  4, a4      # h -= 4
        bne     a4, $aligned
183 184
        ret
        .end pix_abs16x16_mvi_asm