simple_idct_armv6.S 13 KB
Newer Older
Måns Rullgård's avatar
Måns Rullgård committed
1 2 3 4
/*
 * Simple IDCT
 *
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 * Copyright (c) 2007 Mans Rullgard <mans@mansr.com>
Måns Rullgård's avatar
Måns Rullgård committed
6
 *
7
 * This file is part of Libav.
Måns Rullgård's avatar
Måns Rullgård committed
8
 *
9
 * Libav is free software; you can redistribute it and/or
Måns Rullgård's avatar
Måns Rullgård committed
10 11 12 13
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
14
 * Libav is distributed in the hope that it will be useful,
Måns Rullgård's avatar
Måns Rullgård committed
15 16 17 18 19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with Libav; if not, write to the Free Software
Måns Rullgård's avatar
Måns Rullgård committed
21 22 23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

24
#include "libavutil/arm/asm.S"
25

Måns Rullgård's avatar
Måns Rullgård committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define ROW_SHIFT 11
#define COL_SHIFT 20

#define W13 (W1 | (W3 << 16))
#define W26 (W2 | (W6 << 16))
#define W42 (W4 | (W2 << 16))
#define W42n (-W4&0xffff | (-W2 << 16))
#define W46 (W4 | (W6 << 16))
#define W57 (W5 | (W7 << 16))

/*
  Compute partial IDCT of single row.
  shift = left-shift amount
46 47 48
  r0 = source address
  r2 = row[2,0] <= 2 cycles
  r3 = row[3,1]
Måns Rullgård's avatar
Måns Rullgård committed
49
  ip = w42      <= 2 cycles
Måns Rullgård's avatar
Måns Rullgård committed
50

51
  Output in registers r4--r11
Måns Rullgård's avatar
Måns Rullgård committed
52 53
*/
        .macro idct_row shift
54
        ldr    lr, =W46              /* lr  = W4 | (W6 << 16) */
55 56 57
        mov    r1, #(1<<(\shift-1))
        smlad  r4, r2, ip, r1
        smlsd  r7, r2, ip, r1
58 59
        ldr    ip, =W13              /* ip  = W1 | (W3 << 16) */
        ldr    r10,=W57              /* r10 = W5 | (W7 << 16) */
60 61 62
        smlad  r5, r2, lr, r1
        smlsd  r6, r2, lr, r1

63 64 65 66 67 68 69 70
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
        ldr    lr, [r0, #12]         /* lr  =  row[7,5] */
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
        smlad  r8, lr, r10,r8        /* B0  +=      W5*row[5] + W7*row[7] */
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
71

72
        ldr    r3, =W42n             /* r3 =  -W4 | (-W2 << 16) */
73 74 75
        smlad  r10,lr, r2, r10       /* B2 +=  W7*row[5] + W3*row[7] */
        ldr    r2, [r0, #4]          /* r2 =   row[6,4] */
        smlsdx r11,lr, ip, r11       /* B3 +=  W3*row[5] - W1*row[7] */
76
        ldr    ip, =W46              /* ip =   W4 | (W6 << 16) */
77
        smlad  r9, lr, r1, r9        /* B1 -=  W1*row[5] + W5*row[7] */
Måns Rullgård's avatar
Måns Rullgård committed
78

79 80
        smlad  r5, r2, r3, r5        /* A1 += -W4*row[4] - W2*row[6] */
        smlsd  r6, r2, r3, r6        /* A2 += -W4*row[4] + W2*row[6] */
81 82
        smlad  r4, r2, ip, r4        /* A0 +=  W4*row[4] + W6*row[6] */
        smlsd  r7, r2, ip, r7        /* A3 +=  W4*row[4] - W6*row[6] */
Måns Rullgård's avatar
Måns Rullgård committed
83 84
        .endm

85 86 87
/*
  Compute partial IDCT of half row.
  shift = left-shift amount
88 89
  r2 = row[2,0]
  r3 = row[3,1]
90
  ip = w42
91

92
  Output in registers r4--r11
93 94
*/
        .macro idct_row4 shift
95 96
        ldr    lr, =W46              /* lr =  W4 | (W6 << 16) */
        ldr    r10,=W57              /* r10 = W5 | (W7 << 16) */
97 98 99
        mov    r1, #(1<<(\shift-1))
        smlad  r4, r2, ip, r1
        smlsd  r7, r2, ip, r1
100
        ldr    ip, =W13              /* ip =  W1 | (W3 << 16) */
101 102
        smlad  r5, r2, lr, r1
        smlsd  r6, r2, lr, r1
103 104 105 106 107 108
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
109 110
        .endm

Måns Rullgård's avatar
Måns Rullgård committed
111 112
/*
  Compute final part of IDCT single row without shift.
113 114
  Input in registers r4--r11
  Output in registers ip, r4--r6, lr, r8--r10
Måns Rullgård's avatar
Måns Rullgård committed
115 116
*/
        .macro idct_finish
117 118 119 120 121 122 123 124
        add    ip, r4, r8            /* r1 = A0 + B0 */
        sub    lr, r4, r8            /* r2 = A0 - B0 */
        sub    r4, r5, r9            /* r2 = A1 + B1 */
        add    r8, r5, r9            /* r2 = A1 - B1 */
        add    r5, r6, r10           /* r1 = A2 + B2 */
        sub    r9, r6, r10           /* r1 = A2 - B2 */
        add    r6, r7, r11           /* r2 = A3 + B3 */
        sub    r10,r7, r11           /* r2 = A3 - B3 */
Måns Rullgård's avatar
Måns Rullgård committed
125 126 127 128 129
        .endm

/*
  Compute final part of IDCT single row.
  shift = right-shift amount
130
  Input/output in registers r4--r11
Måns Rullgård's avatar
Måns Rullgård committed
131 132
*/
        .macro idct_finish_shift shift
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
        add    r3, r4, r8            /* r3 = A0 + B0 */
        sub    r2, r4, r8            /* r2 = A0 - B0 */
        mov    r4, r3, asr #\shift
        mov    r8, r2, asr #\shift

        sub    r3, r5, r9            /* r3 = A1 + B1 */
        add    r2, r5, r9            /* r2 = A1 - B1 */
        mov    r5, r3, asr #\shift
        mov    r9, r2, asr #\shift

        add    r3, r6, r10           /* r3 = A2 + B2 */
        sub    r2, r6, r10           /* r2 = A2 - B2 */
        mov    r6, r3, asr #\shift
        mov    r10,r2, asr #\shift

        add    r3, r7, r11           /* r3 = A3 + B3 */
        sub    r2, r7, r11           /* r2 = A3 - B3 */
        mov    r7, r3, asr #\shift
        mov    r11,r2, asr #\shift
Måns Rullgård's avatar
Måns Rullgård committed
152 153 154 155 156
        .endm

/*
  Compute final part of IDCT single row, saturating results at 8 bits.
  shift = right-shift amount
157
  Input/output in registers r4--r11
Måns Rullgård's avatar
Måns Rullgård committed
158 159
*/
        .macro idct_finish_shift_sat shift
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
        add    r3, r4, r8            /* r3 = A0 + B0 */
        sub    ip, r4, r8            /* ip = A0 - B0 */
        usat   r4, #8, r3, asr #\shift
        usat   r8, #8, ip, asr #\shift

        sub    r3, r5, r9            /* r3 = A1 + B1 */
        add    ip, r5, r9            /* ip = A1 - B1 */
        usat   r5, #8, r3, asr #\shift
        usat   r9, #8, ip, asr #\shift

        add    r3, r6, r10           /* r3 = A2 + B2 */
        sub    ip, r6, r10           /* ip = A2 - B2 */
        usat   r6, #8, r3, asr #\shift
        usat   r10,#8, ip, asr #\shift

        add    r3, r7, r11           /* r3 = A3 + B3 */
        sub    ip, r7, r11           /* ip = A3 - B3 */
        usat   r7, #8, r3, asr #\shift
        usat   r11,#8, ip, asr #\shift
Måns Rullgård's avatar
Måns Rullgård committed
179 180 181 182
        .endm

/*
  Compute IDCT of single row, storing as column.
183 184
  r0 = source
  r1 = dest
Måns Rullgård's avatar
Måns Rullgård committed
185
*/
186
function idct_row_armv6
187
        push   {lr}
188

189 190 191 192
        ldr    lr, [r0, #12]         /* lr = row[7,5] */
        ldr    ip, [r0, #4]          /* ip = row[6,4] */
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
        ldr    r2, [r0]              /* r2 = row[2,0] */
193
        orrs   lr, lr, ip
194
        itt    eq
195 196
        cmpeq  lr, r3
        cmpeq  lr, r2, lsr #16
197
        beq    1f
198
        push   {r1}
199
        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
200 201
        cmp    lr, #0
        beq    2f
202

203 204
        idct_row   ROW_SHIFT
        b      3f
205

206
2:      idct_row4  ROW_SHIFT
207

208
3:      pop    {r1}
209
        idct_finish_shift ROW_SHIFT
Måns Rullgård's avatar
Måns Rullgård committed
210

211 212 213 214 215 216 217 218
        strh   r4, [r1]
        strh   r5, [r1, #(16*2)]
        strh   r6, [r1, #(16*4)]
        strh   r7, [r1, #(16*6)]
        strh   r11,[r1, #(16*1)]
        strh   r10,[r1, #(16*3)]
        strh   r9, [r1, #(16*5)]
        strh   r8, [r1, #(16*7)]
Måns Rullgård's avatar
Måns Rullgård committed
219

220
        pop    {pc}
221

222 223 224 225 226 227 228 229 230
1:      mov    r2, r2, lsl #3
        strh   r2, [r1]
        strh   r2, [r1, #(16*2)]
        strh   r2, [r1, #(16*4)]
        strh   r2, [r1, #(16*6)]
        strh   r2, [r1, #(16*1)]
        strh   r2, [r1, #(16*3)]
        strh   r2, [r1, #(16*5)]
        strh   r2, [r1, #(16*7)]
231
        pop    {pc}
232
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
233 234 235

/*
  Compute IDCT of single column, read as row.
236 237
  r0 = source
  r1 = dest
Måns Rullgård's avatar
Måns Rullgård committed
238
*/
239
function idct_col_armv6
240
        push   {r1, lr}
Måns Rullgård's avatar
Måns Rullgård committed
241

242
        ldr    r2, [r0]              /* r2 = row[2,0] */
243
        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
244
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
Måns Rullgård's avatar
Måns Rullgård committed
245
        idct_row COL_SHIFT
246
        pop    {r1}
Måns Rullgård's avatar
Måns Rullgård committed
247 248
        idct_finish_shift COL_SHIFT

249 250 251 252 253 254 255 256
        strh   r4, [r1]
        strh   r5, [r1, #(16*1)]
        strh   r6, [r1, #(16*2)]
        strh   r7, [r1, #(16*3)]
        strh   r11,[r1, #(16*4)]
        strh   r10,[r1, #(16*5)]
        strh   r9, [r1, #(16*6)]
        strh   r8, [r1, #(16*7)]
Måns Rullgård's avatar
Måns Rullgård committed
257

258
        pop    {pc}
259
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
260 261 262

/*
  Compute IDCT of single column, read as row, store saturated 8-bit.
263 264 265
  r0 = source
  r1 = dest
  r2 = line size
Måns Rullgård's avatar
Måns Rullgård committed
266
*/
267
function idct_col_put_armv6
268
        push   {r1, r2, lr}
Måns Rullgård's avatar
Måns Rullgård committed
269

270
        ldr    r2, [r0]              /* r2 = row[2,0] */
271
        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
272
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
Måns Rullgård's avatar
Måns Rullgård committed
273
        idct_row COL_SHIFT
274
        pop    {r1, r2}
Måns Rullgård's avatar
Måns Rullgård committed
275 276
        idct_finish_shift_sat COL_SHIFT

277 278 279 280 281 282 283 284
        strb_post r4, r1, r2
        strb_post r5, r1, r2
        strb_post r6, r1, r2
        strb_post r7, r1, r2
        strb_post r11,r1, r2
        strb_post r10,r1, r2
        strb_post r9, r1, r2
        strb_post r8, r1, r2
Måns Rullgård's avatar
Måns Rullgård committed
285

286
        sub    r1, r1, r2, lsl #3
Måns Rullgård's avatar
Måns Rullgård committed
287

288
        pop    {pc}
289
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
290 291 292

/*
  Compute IDCT of single column, read as row, add/store saturated 8-bit.
293 294 295
  r0 = source
  r1 = dest
  r2 = line size
Måns Rullgård's avatar
Måns Rullgård committed
296
*/
297
function idct_col_add_armv6
298
        push   {r1, r2, lr}
Måns Rullgård's avatar
Måns Rullgård committed
299

300
        ldr    r2, [r0]              /* r2 = row[2,0] */
301
        ldr    ip, =W42              /* ip = W4 | (W2 << 16) */
302
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
Måns Rullgård's avatar
Måns Rullgård committed
303
        idct_row COL_SHIFT
304
        pop    {r1, r2}
Måns Rullgård's avatar
Måns Rullgård committed
305 306
        idct_finish

307 308 309 310
        ldrb   r3, [r1]
        ldrb   r7, [r1, r2]
        ldrb   r11,[r1, r2, lsl #2]
        add    ip, r3, ip, asr #COL_SHIFT
Måns Rullgård's avatar
Måns Rullgård committed
311
        usat   ip, #8, ip
312
        add    r4, r7, r4, asr #COL_SHIFT
313
        strb_post ip, r1, r2
314 315 316 317 318
        ldrb   ip, [r1, r2]
        usat   r4, #8, r4
        ldrb   r11,[r1, r2, lsl #2]
        add    r5, ip, r5, asr #COL_SHIFT
        usat   r5, #8, r5
319
        strb_post r4, r1, r2
320 321
        ldrb   r3, [r1, r2]
        ldrb   ip, [r1, r2, lsl #2]
322
        strb_post r5, r1, r2
323 324 325 326 327 328 329 330 331 332 333
        ldrb   r7, [r1, r2]
        ldrb   r4, [r1, r2, lsl #2]
        add    r6, r3, r6, asr #COL_SHIFT
        usat   r6, #8, r6
        add    r10,r7, r10,asr #COL_SHIFT
        usat   r10,#8, r10
        add    r9, r11,r9, asr #COL_SHIFT
        usat   r9, #8, r9
        add    r8, ip, r8, asr #COL_SHIFT
        usat   r8, #8, r8
        add    lr, r4, lr, asr #COL_SHIFT
Måns Rullgård's avatar
Måns Rullgård committed
334
        usat   lr, #8, lr
335 336 337 338 339
        strb_post r6, r1, r2
        strb_post r10,r1, r2
        strb_post r9, r1, r2
        strb_post r8, r1, r2
        strb_post lr, r1, r2
Måns Rullgård's avatar
Måns Rullgård committed
340

341
        sub    r1, r1, r2, lsl #3
Måns Rullgård's avatar
Måns Rullgård committed
342

343
        pop    {pc}
344
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
345 346 347 348 349 350 351 352

/*
  Compute 8 IDCT row transforms.
  func = IDCT row->col function
  width = width of columns in bytes
*/
        .macro idct_rows func width
        bl     \func
353 354
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
355
        bl     \func
356 357
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
358
        bl     \func
359 360
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
361
        bl     \func
362 363
        sub    r0, r0, #(16*5)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
364
        bl     \func
365 366
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
367
        bl     \func
368 369
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
370
        bl     \func
371 372
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
373 374
        bl     \func

375
        sub    r0, r0, #(16*7)
Måns Rullgård's avatar
Måns Rullgård committed
376 377
        .endm

Diego Biurrun's avatar
Diego Biurrun committed
378
/* void ff_simple_idct_armv6(int16_t *data); */
379
function ff_simple_idct_armv6, export=1
380
        push   {r4-r11, lr}
Måns Rullgård's avatar
Måns Rullgård committed
381 382
        sub    sp, sp, #128

383
        mov    r1, sp
Måns Rullgård's avatar
Måns Rullgård committed
384
        idct_rows idct_row_armv6, 2
385 386
        mov    r1, r0
        mov    r0, sp
Måns Rullgård's avatar
Måns Rullgård committed
387 388 389
        idct_rows idct_col_armv6, 2

        add    sp, sp, #128
390
        pop    {r4-r11, pc}
391
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
392

Diego Biurrun's avatar
Diego Biurrun committed
393
/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); */
394
function ff_simple_idct_add_armv6, export=1
395
        push   {r0, r1, r4-r11, lr}
Måns Rullgård's avatar
Måns Rullgård committed
396 397
        sub    sp, sp, #128

398 399
        mov    r0, r2
        mov    r1, sp
Måns Rullgård's avatar
Måns Rullgård committed
400
        idct_rows idct_row_armv6, 2
401 402 403
        mov    r0, sp
        ldr    r1, [sp, #128]
        ldr    r2, [sp, #(128+4)]
Måns Rullgård's avatar
Måns Rullgård committed
404 405 406
        idct_rows idct_col_add_armv6, 1

        add    sp, sp, #(128+8)
407
        pop    {r4-r11, pc}
408
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
409

Diego Biurrun's avatar
Diego Biurrun committed
410
/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); */
411
function ff_simple_idct_put_armv6, export=1
412
        push   {r0, r1, r4-r11, lr}
Måns Rullgård's avatar
Måns Rullgård committed
413 414
        sub    sp, sp, #128

415 416
        mov    r0, r2
        mov    r1, sp
Måns Rullgård's avatar
Måns Rullgård committed
417
        idct_rows idct_row_armv6, 2
418 419 420
        mov    r0, sp
        ldr    r1, [sp, #128]
        ldr    r2, [sp, #(128+4)]
Måns Rullgård's avatar
Måns Rullgård committed
421 422 423
        idct_rows idct_col_put_armv6, 1

        add    sp, sp, #(128+8)
424
        pop    {r4-r11, pc}
425
endfunc