vp8_armv6.S 8.44 KB
Newer Older
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 * Copyright (C) 2010 Mans Rullgard
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

21
#include "libavutil/arm/asm.S"
22 23 24 25 26 27

.macro rac_get_prob     h, bs, buf, cw, pr, t0, t1
        adds            \bs, \bs, \t0
        lsl             \cw, \cw, \t0
        lsl             \t0, \h,  \t0
        rsb             \h,  \pr, #256
28
        it              cs
29 30
        ldrhcs          \t1, [\buf], #2
        smlabb          \h,  \t0, \pr, \h
31
T       itttt           cs
32
        rev16cs         \t1, \t1
33 34 35
A       orrcs           \cw, \cw, \t1, lsl \bs
T       lslcs           \t1, \t1, \bs
T       orrcs           \cw, \cw, \t1
36 37 38
        subcs           \bs, \bs, #16
        lsr             \h,  \h,  #8
        cmp             \cw, \h,  lsl #16
39
        itt             ge
40 41 42 43 44 45 46 47
        subge           \cw, \cw, \h,  lsl #16
        subge           \h,  \t0, \h
.endm

.macro rac_get_128      h, bs, buf, cw, t0, t1
        adds            \bs, \bs, \t0
        lsl             \cw, \cw, \t0
        lsl             \t0, \h,  \t0
48
        it              cs
49 50
        ldrhcs          \t1, [\buf], #2
        mov             \h,  #128
51
        it              cs
52 53
        rev16cs         \t1, \t1
        add             \h,  \h,  \t0, lsl #7
54 55 56 57
A       orrcs           \cw, \cw, \t1, lsl \bs
T       ittt            cs
T       lslcs           \t1, \t1, \bs
T       orrcs           \cw, \cw, \t1
58 59 60
        subcs           \bs, \bs, #16
        lsr             \h,  \h,  #8
        cmp             \cw, \h,  lsl #16
61
        itt             ge
62 63 64 65 66 67
        subge           \cw, \cw, \h,  lsl #16
        subge           \h,  \t0, \h
.endm

function ff_decode_block_coeffs_armv6, export=1
        push            {r0,r1,r4-r11,lr}
68
        movrelx         lr,  X(ff_vp56_norm_shift)
69 70 71 72
        ldrd            r4,  r5,  [sp, #44]             @ token_prob, qmul
        cmp             r3,  #0
        ldr             r11, [r5]
        ldm             r0,  {r5-r7}                    @ high, bits, buf
73
        it              ne
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
        pkhtbne         r11, r11, r11, asr #16
        ldr             r8,  [r0, #16]                  @ code_word
0:
        ldrb            r9,  [lr, r5]
        add             r3,  r3,  #1
        ldrb            r0,  [r4, #1]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        blt             2f

        ldrb            r9,  [lr, r5]
        ldrb            r0,  [r4, #2]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             3f

        add             r4,  r3,  r3,  lsl #5
        sxth            r12, r11
91
        add             r4,  r4,  r2
92 93 94
        adds            r6,  r6,  r9
        add             r4,  r4,  #11
        lsl             r8,  r8,  r9
95
        it              cs
96 97 98
        ldrhcs          r10, [r7], #2
        lsl             r9,  r5,  r9
        mov             r5,  #128
99
        it              cs
100 101
        rev16cs         r10, r10
        add             r5,  r5,  r9,  lsl #7
102 103 104 105
T       ittt            cs
T       lslcs           r10, r10, r6
T       orrcs           r8,  r8,  r10
A       orrcs           r8,  r8,  r10, lsl r6
106 107 108 109
        subcs           r6,  r6,  #16
        lsr             r5,  r5,  #8
        cmp             r8,  r5,  lsl #16
        movrel          r10, zigzag_scan-1
110
        itt             ge
111 112 113
        subge           r8,  r8,  r5,  lsl #16
        subge           r5,  r9,  r5
        ldrb            r10, [r10, r3]
114
        it              ge
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
        rsbge           r12, r12, #0
        cmp             r3,  #16
        strh            r12, [r1, r10]
        bge             6f
5:
        ldrb            r9,  [lr, r5]
        ldrb            r0,  [r4]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        pkhtb           r11, r11, r11, asr #16
        bge             0b

6:
        ldr             r0,  [sp]
        ldr             r9,  [r0, #12]
        cmp             r7,  r9
130
        it              hi
131 132 133 134 135 136 137 138 139 140
        movhi           r7,  r9
        stm             r0,  {r5-r7}                    @ high, bits, buf
        str             r8,  [r0, #16]                  @ code_word

        add             sp,  sp,  #8
        mov             r0,  r3
        pop             {r4-r11,pc}
2:
        add             r4,  r3,  r3,  lsl #5
        cmp             r3,  #16
141
        add             r4,  r4,  r2
142 143 144 145 146 147 148 149 150 151 152 153
        pkhtb           r11, r11, r11, asr #16
        bne             0b
        b               6b
3:
        ldrb            r0,  [r4, #3]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             1f

        mov             r12, #2
        ldrb            r0,  [r4, #4]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
154
        it              ge
155 156 157 158 159
        addge           r12, #1
        ldrb            r9,  [lr, r5]
        blt             4f
        ldrb            r0,  [r4, #5]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
160
        it              ge
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
        addge           r12, #1
        ldrb            r9,  [lr, r5]
        b               4f
1:
        ldrb            r0,  [r4, #6]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             3f

        ldrb            r0,  [r4, #7]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r9,  [lr, r5]
        bge             2f

        mov             r12, #5
        mov             r0,  #159
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
178
        it              ge
179 180 181 182 183 184 185
        addge           r12, r12, #1
        ldrb            r9,  [lr, r5]
        b               4f
2:
        mov             r12, #7
        mov             r0,  #165
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
186
        it              ge
187 188 189 190
        addge           r12, r12, #2
        ldrb            r9,  [lr, r5]
        mov             r0,  #145
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
191
        it              ge
192 193 194 195 196 197
        addge           r12, r12, #1
        ldrb            r9,  [lr, r5]
        b               4f
3:
        ldrb            r0,  [r4, #8]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
198
        it              ge
199 200
        addge           r4,  r4,  #1
        ldrb            r9,  [lr, r5]
201
        ite             ge
202 203 204 205 206
        movge           r12, #2
        movlt           r12, #0
        ldrb            r0,  [r4, #9]
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        mov             r9,  #8
207
        it              ge
208
        addge           r12, r12, #1
209
        movrelx         r4,  X(ff_vp8_dct_cat_prob), r1
210 211 212 213 214 215 216 217 218 219
        lsl             r9,  r9,  r12
        ldr             r4,  [r4, r12, lsl #2]
        add             r12, r9,  #3
        mov             r1,  #0
        ldrb            r0,  [r4], #1
1:
        ldrb            r9,  [lr, r5]
        lsl             r1,  r1,  #1
        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
        ldrb            r0,  [r4], #1
220
        it              ge
221 222 223 224 225 226 227 228
        addge           r1,  r1,  #1
        cmp             r0,  #0
        bne             1b
        ldrb            r9,  [lr, r5]
        add             r12, r12, r1
        ldr             r1,  [sp, #4]
4:
        add             r4,  r3,  r3,  lsl #5
229
        add             r4,  r4,  r2
230 231
        add             r4,  r4,  #22
        rac_get_128     r5,  r6,  r7,  r8,  r9,  r10
232
        it              ge
233 234 235 236 237 238 239 240 241 242
        rsbge           r12, r12, #0
        smulbb          r12, r12, r11
        movrel          r9,  zigzag_scan-1
        ldrb            r9,  [r9, r3]
        cmp             r3,  #16
        strh            r12, [r1, r9]
        bge             6b
        b               5b
endfunc

243
const zigzag_scan
244 245 246 247
        .byte            0,  2,  8, 16
        .byte           10,  4,  6, 12
        .byte           18, 24, 26, 20
        .byte           14, 22, 28, 30
248
endconst