startcode_armv6.S 7.37 KB
Newer Older
1 2 3 4
/*
 * Copyright (c) 2013 RISC OS Open Ltd
 * Author: Ben Avison <bavison@riscosopen.org>
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/arm/asm.S"

RESULT  .req    a1
BUF     .req    a1
SIZE    .req    a2
PATTERN .req    a3
PTR     .req    a4
DAT0    .req    v1
DAT1    .req    v2
DAT2    .req    v3
DAT3    .req    v4
TMP0    .req    v5
TMP1    .req    v6
TMP2    .req    ip
TMP3    .req    lr

#define PRELOAD_DISTANCE 4

.macro innerloop4
        ldr     DAT0, [PTR], #4
        subs    SIZE, SIZE, #4 @ C flag survives rest of macro
        sub     TMP0, DAT0, PATTERN, lsr #14
        bic     TMP0, TMP0, DAT0
        ands    TMP0, TMP0, PATTERN
.endm

.macro innerloop16  decrement, do_preload
        ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
 .ifnc "\do_preload",""
        pld     [PTR, #PRELOAD_DISTANCE*32]
 .endif
 .ifnc "\decrement",""
        subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
 .endif
        sub     TMP0, DAT0, PATTERN, lsr #14
        sub     TMP1, DAT1, PATTERN, lsr #14
        bic     TMP0, TMP0, DAT0
        bic     TMP1, TMP1, DAT1
        sub     TMP2, DAT2, PATTERN, lsr #14
        sub     TMP3, DAT3, PATTERN, lsr #14
        ands    TMP0, TMP0, PATTERN
        bic     TMP2, TMP2, DAT2
        it      eq
        andseq  TMP1, TMP1, PATTERN
        bic     TMP3, TMP3, DAT3
        itt     eq
        andseq  TMP2, TMP2, PATTERN
        andseq  TMP3, TMP3, PATTERN
.endm

72 73
/* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
function ff_startcode_find_candidate_armv6, export=1
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
        push    {v1-v6,lr}
        mov     PTR, BUF
        @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
        @ before using code that does preloads
        cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
        blo     60f

        @ Get to word-alignment, 1 byte at a time
        tst     PTR, #3
        beq     2f
1:      ldrb    DAT0, [PTR], #1
        sub     SIZE, SIZE, #1
        teq     DAT0, #0
        beq     90f
        tst     PTR, #3
        bne     1b
2:      @ Get to 4-word alignment, 1 word at a time
        ldr     PATTERN, =0x80008000
        setend  be
        tst     PTR, #12
        beq     4f
3:      innerloop4
        bne     91f
        tst     PTR, #12
        bne     3b
4:      @ Get to cacheline (8-word) alignment
        tst     PTR, #16
        beq     5f
        innerloop16  16
        bne     93f
5:      @ Check complete cachelines, with preloading
        @ We need to stop when there are still (PRELOAD_DISTANCE+1)
        @ complete cachelines to go
        sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
6:      innerloop16  , do_preload
        bne     93f
        innerloop16  32
        bne     93f
        bcs     6b
        @ Preload trailing part-cacheline, if any
        tst     SIZE, #31
        beq     7f
        pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
        @ Check remaining data without doing any more preloads. First
        @ do in chunks of 4 words:
7:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
        bmi     9f
8:      innerloop16  16
        bne     93f
        bcs     8b
        @ Then in words:
9:      adds    SIZE, SIZE, #16 - 4
        bmi     11f
10:     innerloop4
        bne     91f
        bcs     10b
11:     setend  le
        @ Check second byte of final halfword
        ldrb    DAT0, [PTR, #-1]
        teq     DAT0, #0
        beq     90f
        @ Check any remaining bytes
        tst     SIZE, #3
        beq     13f
12:     ldrb    DAT0, [PTR], #1
        sub     SIZE, SIZE, #1
        teq     DAT0, #0
        beq     90f
        tst     SIZE, #3
        bne     12b
        @ No candidate found
13:     sub     RESULT, PTR, BUF
        b       99f

60:     @ Small buffer - simply check by looping over bytes
        subs    SIZE, SIZE, #1
        bcc     99f
61:     ldrb    DAT0, [PTR], #1
        subs    SIZE, SIZE, #1
        teq     DAT0, #0
        beq     90f
        bcs     61b
        @ No candidate found
        sub     RESULT, PTR, BUF
        b       99f

90:     @ Found a candidate at the preceding byte
        sub     RESULT, PTR, BUF
        sub     RESULT, RESULT, #1
        b       99f

91:     @ Found a candidate somewhere in the preceding 4 bytes
        sub     RESULT, PTR, BUF
        sub     RESULT, RESULT, #4
        sub     TMP0, DAT0, #0x20000
        bics    TMP0, TMP0, DAT0
        itt     pl
        ldrbpl  DAT0, [PTR, #-3]
        addpl   RESULT, RESULT, #2
        bpl     92f
        teq     RESULT, #0
        beq     98f @ don't look back a byte if found at first byte in buffer
        ldrb    DAT0, [PTR, #-5]
92:     teq     DAT0, #0
        it      eq
        subeq   RESULT, RESULT, #1
        b       98f

93:     @ Found a candidate somewhere in the preceding 16 bytes
        sub     RESULT, PTR, BUF
        sub     RESULT, RESULT, #16
        teq     TMP0, #0
        beq     95f @ not in first 4 bytes
        sub     TMP0, DAT0, #0x20000
        bics    TMP0, TMP0, DAT0
        itt     pl
        ldrbpl  DAT0, [PTR, #-15]
        addpl   RESULT, RESULT, #2
        bpl     94f
        teq     RESULT, #0
        beq     98f @ don't look back a byte if found at first byte in buffer
        ldrb    DAT0, [PTR, #-17]
94:     teq     DAT0, #0
        it      eq
        subeq   RESULT, RESULT, #1
        b       98f
95:     add     RESULT, RESULT, #4
        teq     TMP1, #0
        beq     96f @ not in next 4 bytes
        sub     TMP1, DAT1, #0x20000
        bics    TMP1, TMP1, DAT1
        itee    mi
        ldrbmi  DAT0, [PTR, #-13]
        ldrbpl  DAT0, [PTR, #-11]
        addpl   RESULT, RESULT, #2
        teq     DAT0, #0
        it      eq
        subeq   RESULT, RESULT, #1
        b       98f
96:     add     RESULT, RESULT, #4
        teq     TMP2, #0
        beq     97f @ not in next 4 bytes
        sub     TMP2, DAT2, #0x20000
        bics    TMP2, TMP2, DAT2
        itee    mi
        ldrbmi  DAT0, [PTR, #-9]
        ldrbpl  DAT0, [PTR, #-7]
        addpl   RESULT, RESULT, #2
        teq     DAT0, #0
        it      eq
        subeq   RESULT, RESULT, #1
        b       98f
97:     add     RESULT, RESULT, #4
        sub     TMP3, DAT3, #0x20000
        bics    TMP3, TMP3, DAT3
        itee    mi
        ldrbmi  DAT0, [PTR, #-5]
        ldrbpl  DAT0, [PTR, #-3]
        addpl   RESULT, RESULT, #2
        teq     DAT0, #0
        it      eq
        subeq   RESULT, RESULT, #1
        @ drop through to 98f
98:     setend  le
99:     pop     {v1-v6,pc}
239
endfunc
240 241 242 243 244 245 246 247 248 249 250 251 252 253

        .unreq  RESULT
        .unreq  BUF
        .unreq  SIZE
        .unreq  PATTERN
        .unreq  PTR
        .unreq  DAT0
        .unreq  DAT1
        .unreq  DAT2
        .unreq  DAT3
        .unreq  TMP0
        .unreq  TMP1
        .unreq  TMP2
        .unreq  TMP3