checkasm.S 4.32 KB
Newer Older
1 2 3 4 5
/****************************************************************************
 * Assembly testing and benchmarking tool
 * Copyright (c) 2015 Martin Storsjo
 * Copyright (c) 2015 Janne Grunau
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9 10 11 12
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14 15 16 17 18 19 20 21 22 23 24
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
 *****************************************************************************/

#include "libavutil/arm/asm.S"

25 26
/* override fpu so that NEON instructions are rejected */
#if HAVE_VFP
27
FPU     .fpu            vfp
28 29 30
ELF     .eabi_attribute 10, 0           @ suppress Tag_FP_arch
#endif

31
const register_init, align=3
32 33 34 35 36 37 38 39 40 41
    .quad 0x21f86d66c8ca00ce
    .quad 0x75b6ba21077c48ad
    .quad 0xed56bb2dcb3c7736
    .quad 0x8bda43d3fd1a7e06
    .quad 0xb64a9c9e5d318408
    .quad 0xdf9a54b303f1d3a3
    .quad 0x4a75479abd64e097
    .quad 0x249214109d5d1c88
endconst

42
const error_message_fpscr
43 44
    .asciz "failed to preserve register FPSCR, changed bits: %x"
error_message_gpr:
45
    .asciz "failed to preserve register r%d"
46
error_message_vfp:
47
    .asciz "failed to preserve register d%d"
48 49 50 51 52
endconst

@ max number of args used by any asm function.
#define MAX_ARGS 15

53
#define ARG_STACK 4*(MAX_ARGS - 4)
54

55 56 57
@ align the used stack space to 8 to preserve the stack alignment
#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed)

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
.macro clobbercheck variant
.equ pushed, 4*9
function checkasm_checked_call_\variant, export=1
    push        {r4-r11, lr}
.ifc \variant, vfp
    vpush       {d8-d15}
    fmrx        r4,  FPSCR
    push        {r4}
.equ pushed, pushed + 16*4 + 4
.endif

    movrel      r12, register_init
.ifc \variant, vfp
    vldm        r12, {d8-d15}
.endif
    ldm         r12, {r4-r11}

75
    sub         sp,  sp,  #ARG_STACK_A
76
.equ pos, 0
77
.rept MAX_ARGS-4
78
    ldr         r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
79 80 81 82 83 84 85
    str         r12, [sp, #pos]
.equ pos, pos + 4
.endr

    mov         r12, r0
    mov         r0,  r2
    mov         r1,  r3
86
    ldrd        r2,  r3,  [sp, #ARG_STACK_A + pushed]
87
    blx         r12
88
    add         sp,  sp,  #ARG_STACK_A
89 90 91 92

    push        {r0, r1}
    movrel      r12, register_init
.ifc \variant, vfp
93
.macro check_reg_vfp, dreg, offset
94 95 96 97
    ldrd        r2,  r3,  [r12, #8 * (\offset)]
    vmov        r0,  lr,  \dreg
    eor         r2,  r2,  r0
    eor         r3,  r3,  lr
98 99
    orrs        r2,  r2,  r3
    bne         4f
100 101
.endm

102 103 104 105
.irp n, 8, 9, 10, 11, 12, 13, 14, 15
    @ keep track of the checked double/SIMD register
    mov         r1,  #\n
    check_reg_vfp d\n, \n-8
106 107 108
.endr
.purgem check_reg_vfp

109
    fmrx        r1,  FPSCR
110
    ldr         r3,  [sp, #8]
111 112 113
    eor         r1,  r1,  r3
    @ Ignore changes in bits 0-4 and 7
    bic         r1,  r1,  #0x9f
114
    @ Ignore changes in the topmost 5 bits
115
    bics        r1,  r1,  #0xf8000000
116
    bne         3f
117 118
.endif

119 120
    @ keep track of the checked GPR
    mov         r1,  #4
121
.macro check_reg reg1, reg2=
122 123 124 125
    ldrd        r2,  r3,  [r12], #8
    eors        r2,  r2,  \reg1
    bne         2f
    add         r1,  r1,  #1
126
.ifnb \reg2
127 128
    eors        r3,  r3,  \reg2
    bne         2f
129
.endif
130
    add         r1,  r1,  #1
131 132 133 134 135 136 137 138 139 140 141 142
.endm
    check_reg   r4,  r5
    check_reg   r6,  r7
@ r9 is a volatile register in the ios ABI
#ifdef __APPLE__
    check_reg   r8
#else
    check_reg   r8,  r9
#endif
    check_reg   r10, r11
.purgem check_reg

143 144 145 146 147 148 149 150 151 152
    b           0f
4:
    movrel      r0, error_message_vfp
    b           1f
3:
    movrel      r0, error_message_fpscr
    b           1f
2:
    movrel      r0, error_message_gpr
1:
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
    blx         X(checkasm_fail_func)
0:
    pop         {r0, r1}
.ifc \variant, vfp
    pop         {r2}
    fmxr        FPSCR, r2
    vpop        {d8-d15}
.endif
    pop         {r4-r11, pc}
endfunc
.endm

#if HAVE_VFP || HAVE_NEON
clobbercheck vfp
#endif
clobbercheck novfp