simple_idct_armv6.S 13.1 KB
Newer Older
Måns Rullgård's avatar
Måns Rullgård committed
1 2 3 4
/*
 * Simple IDCT
 *
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 * Copyright (c) 2007 Mans Rullgard <mans@mansr.com>
Måns Rullgård's avatar
Måns Rullgård committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

24 25
#include "asm.S"

Måns Rullgård's avatar
Måns Rullgård committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
#define ROW_SHIFT 11
#define COL_SHIFT 20

#define W13 (W1 | (W3 << 16))
#define W26 (W2 | (W6 << 16))
#define W42 (W4 | (W2 << 16))
#define W42n (-W4&0xffff | (-W2 << 16))
#define W46 (W4 | (W6 << 16))
#define W57 (W5 | (W7 << 16))

        .text
        .align
w13:    .long W13
w26:    .long W26
w42:    .long W42
w42n:   .long W42n
w46:    .long W46
w57:    .long W57

/*
  Compute partial IDCT of single row.
  shift = left-shift amount
55 56 57
  r0 = source address
  r2 = row[2,0] <= 2 cycles
  r3 = row[3,1]
Måns Rullgård's avatar
Måns Rullgård committed
58
  ip = w42      <= 2 cycles
Måns Rullgård's avatar
Måns Rullgård committed
59

60
  Output in registers r4--r11
Måns Rullgård's avatar
Måns Rullgård committed
61 62
*/
        .macro idct_row shift
63
        ldr    lr, w46               /* lr  = W4 | (W6 << 16) */
64 65 66
        mov    r1, #(1<<(\shift-1))
        smlad  r4, r2, ip, r1
        smlsd  r7, r2, ip, r1
67
        ldr    ip, w13               /* ip  = W1 | (W3 << 16) */
68
        ldr    r10,w57               /* r10 = W5 | (W7 << 16) */
69 70 71
        smlad  r5, r2, lr, r1
        smlsd  r6, r2, lr, r1

72 73 74 75 76 77 78 79
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
        ldr    lr, [r0, #12]         /* lr  =  row[7,5] */
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
        smlad  r8, lr, r10,r8        /* B0  +=      W5*row[5] + W7*row[7] */
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
80

81 82 83 84 85 86
        ldr    r3, w42n              /* r3 =  -W4 | (-W2 << 16) */
        smlad  r10,lr, r2, r10       /* B2 +=  W7*row[5] + W3*row[7] */
        ldr    r2, [r0, #4]          /* r2 =   row[6,4] */
        smlsdx r11,lr, ip, r11       /* B3 +=  W3*row[5] - W1*row[7] */
        ldr    ip, w46               /* ip =   W4 | (W6 << 16) */
        smlad  r9, lr, r1, r9        /* B1 -=  W1*row[5] + W5*row[7] */
Måns Rullgård's avatar
Måns Rullgård committed
87

88 89
        smlad  r5, r2, r3, r5        /* A1 += -W4*row[4] - W2*row[6] */
        smlsd  r6, r2, r3, r6        /* A2 += -W4*row[4] + W2*row[6] */
90 91
        smlad  r4, r2, ip, r4        /* A0 +=  W4*row[4] + W6*row[6] */
        smlsd  r7, r2, ip, r7        /* A3 +=  W4*row[4] - W6*row[6] */
Måns Rullgård's avatar
Måns Rullgård committed
92 93
        .endm

94 95 96
/*
  Compute partial IDCT of half row.
  shift = left-shift amount
97 98
  r2 = row[2,0]
  r3 = row[3,1]
99
  ip = w42
100

101
  Output in registers r4--r11
102 103
*/
        .macro idct_row4 shift
104
        ldr    lr, w46               /* lr =  W4 | (W6 << 16) */
105
        ldr    r10,w57               /* r10 = W5 | (W7 << 16) */
106 107 108
        mov    r1, #(1<<(\shift-1))
        smlad  r4, r2, ip, r1
        smlsd  r7, r2, ip, r1
109
        ldr    ip, w13               /* ip =  W1 | (W3 << 16) */
110 111
        smlad  r5, r2, lr, r1
        smlsd  r6, r2, lr, r1
112 113 114 115 116 117
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
118 119
        .endm

Måns Rullgård's avatar
Måns Rullgård committed
120 121
/*
  Compute final part of IDCT single row without shift.
122 123
  Input in registers r4--r11
  Output in registers ip, r4--r6, lr, r8--r10
Måns Rullgård's avatar
Måns Rullgård committed
124 125
*/
        .macro idct_finish
126 127 128 129 130 131 132 133
        add    ip, r4, r8            /* r1 = A0 + B0 */
        sub    lr, r4, r8            /* r2 = A0 - B0 */
        sub    r4, r5, r9            /* r2 = A1 + B1 */
        add    r8, r5, r9            /* r2 = A1 - B1 */
        add    r5, r6, r10           /* r1 = A2 + B2 */
        sub    r9, r6, r10           /* r1 = A2 - B2 */
        add    r6, r7, r11           /* r2 = A3 + B3 */
        sub    r10,r7, r11           /* r2 = A3 - B3 */
Måns Rullgård's avatar
Måns Rullgård committed
134 135 136 137 138
        .endm

/*
  Compute final part of IDCT single row.
  shift = right-shift amount
139
  Input/output in registers r4--r11
Måns Rullgård's avatar
Måns Rullgård committed
140 141
*/
        .macro idct_finish_shift shift
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
        add    r3, r4, r8            /* r3 = A0 + B0 */
        sub    r2, r4, r8            /* r2 = A0 - B0 */
        mov    r4, r3, asr #\shift
        mov    r8, r2, asr #\shift

        sub    r3, r5, r9            /* r3 = A1 + B1 */
        add    r2, r5, r9            /* r2 = A1 - B1 */
        mov    r5, r3, asr #\shift
        mov    r9, r2, asr #\shift

        add    r3, r6, r10           /* r3 = A2 + B2 */
        sub    r2, r6, r10           /* r2 = A2 - B2 */
        mov    r6, r3, asr #\shift
        mov    r10,r2, asr #\shift

        add    r3, r7, r11           /* r3 = A3 + B3 */
        sub    r2, r7, r11           /* r2 = A3 - B3 */
        mov    r7, r3, asr #\shift
        mov    r11,r2, asr #\shift
Måns Rullgård's avatar
Måns Rullgård committed
161 162 163 164 165
        .endm

/*
  Compute final part of IDCT single row, saturating results at 8 bits.
  shift = right-shift amount
166
  Input/output in registers r4--r11
Måns Rullgård's avatar
Måns Rullgård committed
167 168
*/
        .macro idct_finish_shift_sat shift
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
        add    r3, r4, r8            /* r3 = A0 + B0 */
        sub    ip, r4, r8            /* ip = A0 - B0 */
        usat   r4, #8, r3, asr #\shift
        usat   r8, #8, ip, asr #\shift

        sub    r3, r5, r9            /* r3 = A1 + B1 */
        add    ip, r5, r9            /* ip = A1 - B1 */
        usat   r5, #8, r3, asr #\shift
        usat   r9, #8, ip, asr #\shift

        add    r3, r6, r10           /* r3 = A2 + B2 */
        sub    ip, r6, r10           /* ip = A2 - B2 */
        usat   r6, #8, r3, asr #\shift
        usat   r10,#8, ip, asr #\shift

        add    r3, r7, r11           /* r3 = A3 + B3 */
        sub    ip, r7, r11           /* ip = A3 - B3 */
        usat   r7, #8, r3, asr #\shift
        usat   r11,#8, ip, asr #\shift
Måns Rullgård's avatar
Måns Rullgård committed
188 189 190 191
        .endm

/*
  Compute IDCT of single row, storing as column.
192 193
  r0 = source
  r1 = dest
Måns Rullgård's avatar
Måns Rullgård committed
194
*/
195
function idct_row_armv6
196
        push   {lr}
197

198 199 200 201
        ldr    lr, [r0, #12]         /* lr = row[7,5] */
        ldr    ip, [r0, #4]          /* ip = row[6,4] */
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
        ldr    r2, [r0]              /* r2 = row[2,0] */
202
        orrs   lr, lr, ip
203
        itt    eq
204 205
        cmpeq  lr, r3
        cmpeq  lr, r2, lsr #16
206
        beq    1f
207
        push   {r1}
208
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
209 210
        cmp    lr, #0
        beq    2f
211

212 213
        idct_row   ROW_SHIFT
        b      3f
214

215
2:      idct_row4  ROW_SHIFT
216

217
3:      pop    {r1}
218
        idct_finish_shift ROW_SHIFT
Måns Rullgård's avatar
Måns Rullgård committed
219

220 221 222 223 224 225 226 227
        strh   r4, [r1]
        strh   r5, [r1, #(16*2)]
        strh   r6, [r1, #(16*4)]
        strh   r7, [r1, #(16*6)]
        strh   r11,[r1, #(16*1)]
        strh   r10,[r1, #(16*3)]
        strh   r9, [r1, #(16*5)]
        strh   r8, [r1, #(16*7)]
Måns Rullgård's avatar
Måns Rullgård committed
228

229
        pop    {pc}
230

231 232 233 234 235 236 237 238 239
1:      mov    r2, r2, lsl #3
        strh   r2, [r1]
        strh   r2, [r1, #(16*2)]
        strh   r2, [r1, #(16*4)]
        strh   r2, [r1, #(16*6)]
        strh   r2, [r1, #(16*1)]
        strh   r2, [r1, #(16*3)]
        strh   r2, [r1, #(16*5)]
        strh   r2, [r1, #(16*7)]
240
        pop    {pc}
241
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
242 243 244

/*
  Compute IDCT of single column, read as row.
245 246
  r0 = source
  r1 = dest
Måns Rullgård's avatar
Måns Rullgård committed
247
*/
248
function idct_col_armv6
249
        push   {r1, lr}
Måns Rullgård's avatar
Måns Rullgård committed
250

251
        ldr    r2, [r0]              /* r2 = row[2,0] */
252
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
253
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
Måns Rullgård's avatar
Måns Rullgård committed
254
        idct_row COL_SHIFT
255
        pop    {r1}
Måns Rullgård's avatar
Måns Rullgård committed
256 257
        idct_finish_shift COL_SHIFT

258 259 260 261 262 263 264 265
        strh   r4, [r1]
        strh   r5, [r1, #(16*1)]
        strh   r6, [r1, #(16*2)]
        strh   r7, [r1, #(16*3)]
        strh   r11,[r1, #(16*4)]
        strh   r10,[r1, #(16*5)]
        strh   r9, [r1, #(16*6)]
        strh   r8, [r1, #(16*7)]
Måns Rullgård's avatar
Måns Rullgård committed
266

267
        pop    {pc}
268
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
269 270 271

/*
  Compute IDCT of single column, read as row, store saturated 8-bit.
272 273 274
  r0 = source
  r1 = dest
  r2 = line size
Måns Rullgård's avatar
Måns Rullgård committed
275
*/
276
function idct_col_put_armv6
277
        push   {r1, r2, lr}
Måns Rullgård's avatar
Måns Rullgård committed
278

279
        ldr    r2, [r0]              /* r2 = row[2,0] */
280
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
281
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
Måns Rullgård's avatar
Måns Rullgård committed
282
        idct_row COL_SHIFT
283
        pop    {r1, r2}
Måns Rullgård's avatar
Måns Rullgård committed
284 285
        idct_finish_shift_sat COL_SHIFT

286 287 288 289 290 291 292 293
        strb_post r4, r1, r2
        strb_post r5, r1, r2
        strb_post r6, r1, r2
        strb_post r7, r1, r2
        strb_post r11,r1, r2
        strb_post r10,r1, r2
        strb_post r9, r1, r2
        strb_post r8, r1, r2
Måns Rullgård's avatar
Måns Rullgård committed
294

295
        sub    r1, r1, r2, lsl #3
Måns Rullgård's avatar
Måns Rullgård committed
296

297
        pop    {pc}
298
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
299 300 301

/*
  Compute IDCT of single column, read as row, add/store saturated 8-bit.
302 303 304
  r0 = source
  r1 = dest
  r2 = line size
Måns Rullgård's avatar
Måns Rullgård committed
305
*/
306
function idct_col_add_armv6
307
        push   {r1, r2, lr}
Måns Rullgård's avatar
Måns Rullgård committed
308

309
        ldr    r2, [r0]              /* r2 = row[2,0] */
310
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
311
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
Måns Rullgård's avatar
Måns Rullgård committed
312
        idct_row COL_SHIFT
313
        pop    {r1, r2}
Måns Rullgård's avatar
Måns Rullgård committed
314 315
        idct_finish

316 317 318 319
        ldrb   r3, [r1]
        ldrb   r7, [r1, r2]
        ldrb   r11,[r1, r2, lsl #2]
        add    ip, r3, ip, asr #COL_SHIFT
Måns Rullgård's avatar
Måns Rullgård committed
320
        usat   ip, #8, ip
321
        add    r4, r7, r4, asr #COL_SHIFT
322
        strb_post ip, r1, r2
323 324 325 326 327
        ldrb   ip, [r1, r2]
        usat   r4, #8, r4
        ldrb   r11,[r1, r2, lsl #2]
        add    r5, ip, r5, asr #COL_SHIFT
        usat   r5, #8, r5
328
        strb_post r4, r1, r2
329 330
        ldrb   r3, [r1, r2]
        ldrb   ip, [r1, r2, lsl #2]
331
        strb_post r5, r1, r2
332 333 334 335 336 337 338 339 340 341 342
        ldrb   r7, [r1, r2]
        ldrb   r4, [r1, r2, lsl #2]
        add    r6, r3, r6, asr #COL_SHIFT
        usat   r6, #8, r6
        add    r10,r7, r10,asr #COL_SHIFT
        usat   r10,#8, r10
        add    r9, r11,r9, asr #COL_SHIFT
        usat   r9, #8, r9
        add    r8, ip, r8, asr #COL_SHIFT
        usat   r8, #8, r8
        add    lr, r4, lr, asr #COL_SHIFT
Måns Rullgård's avatar
Måns Rullgård committed
343
        usat   lr, #8, lr
344 345 346 347 348
        strb_post r6, r1, r2
        strb_post r10,r1, r2
        strb_post r9, r1, r2
        strb_post r8, r1, r2
        strb_post lr, r1, r2
Måns Rullgård's avatar
Måns Rullgård committed
349

350
        sub    r1, r1, r2, lsl #3
Måns Rullgård's avatar
Måns Rullgård committed
351

352
        pop    {pc}
353
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
354 355 356 357 358 359 360 361

/*
  Compute 8 IDCT row transforms.
  func = IDCT row->col function
  width = width of columns in bytes
*/
        .macro idct_rows func width
        bl     \func
362 363
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
364
        bl     \func
365 366
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
367
        bl     \func
368 369
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
370
        bl     \func
371 372
        sub    r0, r0, #(16*5)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
373
        bl     \func
374 375
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
376
        bl     \func
377 378
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
379
        bl     \func
380 381
        add    r0, r0, #(16*2)
        add    r1, r1, #\width
Måns Rullgård's avatar
Måns Rullgård committed
382 383
        bl     \func

384
        sub    r0, r0, #(16*7)
Måns Rullgård's avatar
Måns Rullgård committed
385 386 387
        .endm

/* void ff_simple_idct_armv6(DCTELEM *data); */
388
function ff_simple_idct_armv6, export=1
389
        push   {r4-r11, lr}
Måns Rullgård's avatar
Måns Rullgård committed
390 391
        sub    sp, sp, #128

392
        mov    r1, sp
Måns Rullgård's avatar
Måns Rullgård committed
393
        idct_rows idct_row_armv6, 2
394 395
        mov    r1, r0
        mov    r0, sp
Måns Rullgård's avatar
Måns Rullgård committed
396 397 398
        idct_rows idct_col_armv6, 2

        add    sp, sp, #128
399
        pop    {r4-r11, pc}
400
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
401 402

/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
403
function ff_simple_idct_add_armv6, export=1
404
        push   {r0, r1, r4-r11, lr}
Måns Rullgård's avatar
Måns Rullgård committed
405 406
        sub    sp, sp, #128

407 408
        mov    r0, r2
        mov    r1, sp
Måns Rullgård's avatar
Måns Rullgård committed
409
        idct_rows idct_row_armv6, 2
410 411 412
        mov    r0, sp
        ldr    r1, [sp, #128]
        ldr    r2, [sp, #(128+4)]
Måns Rullgård's avatar
Måns Rullgård committed
413 414 415
        idct_rows idct_col_add_armv6, 1

        add    sp, sp, #(128+8)
416
        pop    {r4-r11, pc}
417
endfunc
Måns Rullgård's avatar
Måns Rullgård committed
418 419

/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
420
function ff_simple_idct_put_armv6, export=1
421
        push   {r0, r1, r4-r11, lr}
Måns Rullgård's avatar
Måns Rullgård committed
422 423
        sub    sp, sp, #128

424 425
        mov    r0, r2
        mov    r1, sp
Måns Rullgård's avatar
Måns Rullgård committed
426
        idct_rows idct_row_armv6, 2
427 428 429
        mov    r0, sp
        ldr    r1, [sp, #128]
        ldr    r2, [sp, #(128+4)]
Måns Rullgård's avatar
Måns Rullgård committed
430 431 432
        idct_rows idct_col_put_armv6, 1

        add    sp, sp, #(128+8)
433
        pop    {r4-r11, pc}
434
endfunc