hpeldsp_armv6.S 8.56 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/arm/asm.S"

.macro  call_2x_pixels  type, subp
function ff_\type\()_pixels16\subp\()_armv6, export=1
        push            {r0-r3, lr}
26
        bl              X(ff_\type\()_pixels8\subp\()_armv6)
27 28 29
        pop             {r0-r3, lr}
        add             r0,  r0,  #8
        add             r1,  r1,  #8
30
        b               X(ff_\type\()_pixels8\subp\()_armv6)
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
endfunc
.endm

call_2x_pixels          avg
call_2x_pixels          put, _x2
call_2x_pixels          put, _y2
call_2x_pixels          put, _x2_no_rnd
call_2x_pixels          put, _y2_no_rnd

function ff_put_pixels16_armv6, export=1
        push            {r4-r11}
1:
        ldr             r5,  [r1, #4]
        ldr             r6,  [r1, #8]
        ldr             r7,  [r1, #12]
        ldr_post        r4,  r1,  r2
        strd            r6,  r7,  [r0, #8]
        ldr             r9,  [r1, #4]
        strd_post       r4,  r5,  r0,  r2
        ldr             r10, [r1, #8]
        ldr             r11, [r1, #12]
        ldr_post        r8,  r1,  r2
        strd            r10, r11, [r0, #8]
        subs            r3,  r3,  #2
        strd_post       r8,  r9,  r0,  r2
        bne             1b

        pop             {r4-r11}
        bx              lr
endfunc

function ff_put_pixels8_armv6, export=1
        push            {r4-r7}
1:
        ldr             r5,  [r1, #4]
        ldr_post        r4,  r1,  r2
        ldr             r7,  [r1, #4]
        strd_post       r4,  r5,  r0,  r2
        ldr_post        r6,  r1,  r2
        subs            r3,  r3,  #2
        strd_post       r6,  r7,  r0,  r2
        bne             1b

        pop             {r4-r7}
        bx              lr
endfunc

function ff_put_pixels8_x2_armv6, export=1
        push            {r4-r11, lr}
        mov             r12, #1
        orr             r12, r12, r12, lsl #8
        orr             r12, r12, r12, lsl #16
1:
        ldr             r4,  [r1]
        subs            r3,  r3,  #2
        ldr             r5,  [r1, #4]
        ldr             r7,  [r1, #5]
        lsr             r6,  r4,  #8
        ldr_pre         r8,  r1,  r2
        orr             r6,  r6,  r5,  lsl #24
        ldr             r9,  [r1, #4]
        ldr             r11, [r1, #5]
        lsr             r10, r8,  #8
        add             r1,  r1,  r2
        orr             r10, r10, r9,  lsl #24
        eor             r14, r4,  r6
        uhadd8          r4,  r4,  r6
        eor             r6,  r5,  r7
        uhadd8          r5,  r5,  r7
        and             r14, r14, r12
        and             r6,  r6,  r12
        uadd8           r4,  r4,  r14
        eor             r14, r8,  r10
        uadd8           r5,  r5,  r6
        eor             r6,  r9,  r11
        uhadd8          r8,  r8,  r10
        and             r14, r14, r12
        uhadd8          r9,  r9,  r11
        and             r6,  r6,  r12
        uadd8           r8,  r8,  r14
        strd_post       r4,  r5,  r0,  r2
        uadd8           r9,  r9,  r6
        strd_post       r8,  r9,  r0,  r2
        bne             1b

        pop             {r4-r11, pc}
endfunc

function ff_put_pixels8_y2_armv6, export=1
        push            {r4-r11}
        mov             r12, #1
        orr             r12, r12, r12, lsl #8
        orr             r12, r12, r12, lsl #16
        ldr             r4,  [r1]
        ldr             r5,  [r1, #4]
        ldr_pre         r6,  r1,  r2
        ldr             r7,  [r1, #4]
1:
        subs            r3,  r3,  #2
        uhadd8          r8,  r4,  r6
        eor             r10, r4,  r6
        uhadd8          r9,  r5,  r7
        eor             r11, r5,  r7
        and             r10, r10, r12
135
        ldr_pre         r4,  r1,  r2
136 137 138
        uadd8           r8,  r8,  r10
        and             r11, r11, r12
        uadd8           r9,  r9,  r11
139
        ldr             r5,  [r1, #4]
140 141 142 143 144 145 146
        uhadd8          r10, r4,  r6
        eor             r6,  r4,  r6
        uhadd8          r11, r5,  r7
        and             r6,  r6,  r12
        eor             r7,  r5,  r7
        uadd8           r10, r10, r6
        and             r7,  r7,  r12
147
        ldrc_pre        ne,  r6,  r1,  r2
148 149
        uadd8           r11, r11, r7
        strd_post       r8,  r9,  r0,  r2
150 151
        it              ne
        ldrne           r7,  [r1, #4]
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
        strd_post       r10, r11, r0,  r2
        bne             1b

        pop             {r4-r11}
        bx              lr
endfunc

function ff_put_pixels8_x2_no_rnd_armv6, export=1
        push            {r4-r9, lr}
1:
        subs            r3,  r3,  #2
        ldr             r4,  [r1]
        ldr             r5,  [r1, #4]
        ldr             r7,  [r1, #5]
        ldr_pre         r8,  r1,  r2
        ldr             r9,  [r1, #4]
        ldr             r14, [r1, #5]
        add             r1,  r1,  r2
        lsr             r6,  r4,  #8
        orr             r6,  r6,  r5,  lsl #24
        lsr             r12, r8,  #8
        orr             r12, r12, r9,  lsl #24
        uhadd8          r4,  r4,  r6
        uhadd8          r5,  r5,  r7
        uhadd8          r8,  r8,  r12
        uhadd8          r9,  r9,  r14
        stm             r0,  {r4,r5}
        add             r0,  r0,  r2
        stm             r0,  {r8,r9}
        add             r0,  r0,  r2
        bne             1b

        pop             {r4-r9, pc}
endfunc

function ff_put_pixels8_y2_no_rnd_armv6, export=1
        push            {r4-r9, lr}
        ldr             r4,  [r1]
        ldr             r5,  [r1, #4]
        ldr_pre         r6,  r1,  r2
        ldr             r7,  [r1, #4]
1:
        subs            r3,  r3,  #2
        uhadd8          r8,  r4,  r6
196
        ldr_pre         r4,  r1,  r2
197
        uhadd8          r9,  r5,  r7
198
        ldr             r5,  [r1, #4]
199
        uhadd8          r12, r4,  r6
200
        ldrc_pre        ne,  r6,  r1,  r2
201
        uhadd8          r14, r5,  r7
202 203
        it              ne
        ldrne           r7,  [r1, #4]
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
        stm             r0,  {r8,r9}
        add             r0,  r0,  r2
        stm             r0,  {r12,r14}
        add             r0,  r0,  r2
        bne             1b

        pop             {r4-r9, pc}
endfunc

function ff_avg_pixels8_armv6, export=1
        pld             [r1, r2]
        push            {r4-r10, lr}
        mov             lr,  #1
        orr             lr,  lr,  lr,  lsl #8
        orr             lr,  lr,  lr,  lsl #16
        ldrd            r4,  r5,  [r0]
        ldr             r10, [r1, #4]
        ldr_post        r9,  r1,  r2
        subs            r3,  r3,  #2
1:
        pld             [r1, r2]
        eor             r8,  r4,  r9
        uhadd8          r4,  r4,  r9
        eor             r12, r5,  r10
        ldrd_reg        r6,  r7,  r0,  r2
        uhadd8          r5,  r5,  r10
        and             r8,  r8,  lr
        ldr             r10, [r1, #4]
        and             r12, r12, lr
        uadd8           r4,  r4,  r8
        ldr_post        r9,  r1,  r2
        eor             r8,  r6,  r9
        uadd8           r5,  r5,  r12
        pld             [r1, r2,  lsl #1]
        eor             r12, r7,  r10
        uhadd8          r6,  r6,  r9
        strd_post       r4,  r5,  r0,  r2
        uhadd8          r7,  r7,  r10
        beq             2f
        and             r8,  r8,  lr
        ldrd_reg        r4,  r5,  r0,  r2
        uadd8           r6,  r6,  r8
        ldr             r10, [r1, #4]
        and             r12, r12, lr
        subs            r3,  r3,  #2
        uadd8           r7,  r7,  r12
        ldr_post        r9,  r1,  r2
        strd_post       r6,  r7,  r0,  r2
        b               1b
2:
        and             r8,  r8,  lr
        and             r12, r12, lr
        uadd8           r6,  r6,  r8
        uadd8           r7,  r7,  r12
        strd_post       r6,  r7,  r0,  r2

        pop             {r4-r10, pc}
endfunc