vis.h 13.4 KB
Newer Older
1 2 3
/*
 * Copyright (C) 2003 David S. Miller <davem@redhat.com>
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or
7 8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with Libav; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
 */

/* You may be asking why I hard-code the instruction opcodes and don't
 * use the normal VIS assembler mnenomics for the VIS instructions.
 *
 * The reason is that Sun, in their infinite wisdom, decided that a binary
 * using a VIS instruction will cause it to be marked (in the ELF headers)
 * as doing so, and this prevents the OS from loading such binaries if the
 * current cpu doesn't have VIS.  There is no way to easily override this
 * behavior of the assembler that I am aware of.
 *
 * This totally defeats what libmpeg2 is trying to do which is allow a
 * single binary to be created, and then detect the availability of VIS
 * at runtime.
 *
 * I'm not saying that tainting the binary by default is bad, rather I'm
 * saying that not providing a way to override this easily unnecessarily
 * ties people's hands.
 *
 * Thus, we do the opcode encoding by hand and output 32-bit words in
 * the assembler to keep the binary from becoming tainted.
 */

42 43
#ifndef AVCODEC_SPARC_VIS_H
#define AVCODEC_SPARC_VIS_H
44

45 46 47 48 49 50 51 52 53 54
#define vis_opc_base    ((0x1 << 31) | (0x36 << 19))
#define vis_opf(X)      ((X) << 5)
#define vis_sreg(X)     (X)
#define vis_dreg(X)     (((X)&0x1f)|((X)>>5))
#define vis_rs1_s(X)    (vis_sreg(X) << 14)
#define vis_rs1_d(X)    (vis_dreg(X) << 14)
#define vis_rs2_s(X)    (vis_sreg(X) << 0)
#define vis_rs2_d(X)    (vis_dreg(X) << 0)
#define vis_rd_s(X)     (vis_sreg(X) << 25)
#define vis_rd_d(X)     (vis_dreg(X) << 25)
55 56

#define vis_ss2s(opf,rs1,rs2,rd) \
57
        __asm__ volatile (".word %0" \
58
                              : : "i" (vis_opc_base | vis_opf(opf) | \
59 60 61 62 63
                                       vis_rs1_s(rs1) | \
                                       vis_rs2_s(rs2) | \
                                       vis_rd_s(rd)))

#define vis_dd2d(opf,rs1,rs2,rd) \
64
        __asm__ volatile (".word %0" \
65
                              : : "i" (vis_opc_base | vis_opf(opf) | \
66 67 68 69 70
                                       vis_rs1_d(rs1) | \
                                       vis_rs2_d(rs2) | \
                                       vis_rd_d(rd)))

#define vis_ss2d(opf,rs1,rs2,rd) \
71
        __asm__ volatile (".word %0" \
72
                              : : "i" (vis_opc_base | vis_opf(opf) | \
73 74 75 76 77
                                       vis_rs1_s(rs1) | \
                                       vis_rs2_s(rs2) | \
                                       vis_rd_d(rd)))

#define vis_sd2d(opf,rs1,rs2,rd) \
78
        __asm__ volatile (".word %0" \
79
                              : : "i" (vis_opc_base | vis_opf(opf) | \
80 81 82 83 84
                                       vis_rs1_s(rs1) | \
                                       vis_rs2_d(rs2) | \
                                       vis_rd_d(rd)))

#define vis_d2s(opf,rs2,rd) \
85
        __asm__ volatile (".word %0" \
86
                              : : "i" (vis_opc_base | vis_opf(opf) | \
87 88 89 90
                                       vis_rs2_d(rs2) | \
                                       vis_rd_s(rd)))

#define vis_s2d(opf,rs2,rd) \
91
        __asm__ volatile (".word %0" \
92
                              : : "i" (vis_opc_base | vis_opf(opf) | \
93 94 95 96
                                       vis_rs2_s(rs2) | \
                                       vis_rd_d(rd)))

#define vis_d12d(opf,rs1,rd) \
97
        __asm__ volatile (".word %0" \
98
                              : : "i" (vis_opc_base | vis_opf(opf) | \
99 100 101 102
                                       vis_rs1_d(rs1) | \
                                       vis_rd_d(rd)))

#define vis_d22d(opf,rs2,rd) \
103
        __asm__ volatile (".word %0" \
104
                              : : "i" (vis_opc_base | vis_opf(opf) | \
105 106 107 108
                                       vis_rs2_d(rs2) | \
                                       vis_rd_d(rd)))

#define vis_s12s(opf,rs1,rd) \
109
        __asm__ volatile (".word %0" \
110
                              : : "i" (vis_opc_base | vis_opf(opf) | \
111 112 113 114
                                       vis_rs1_s(rs1) | \
                                       vis_rd_s(rd)))

#define vis_s22s(opf,rs2,rd) \
115
        __asm__ volatile (".word %0" \
116
                              : : "i" (vis_opc_base | vis_opf(opf) | \
117 118 119 120
                                       vis_rs2_s(rs2) | \
                                       vis_rd_s(rd)))

#define vis_s(opf,rd) \
121
        __asm__ volatile (".word %0" \
122
                              : : "i" (vis_opc_base | vis_opf(opf) | \
123 124 125
                                       vis_rd_s(rd)))

#define vis_d(opf,rd) \
126
        __asm__ volatile (".word %0" \
127
                              : : "i" (vis_opc_base | vis_opf(opf) | \
128 129 130
                                       vis_rd_d(rd)))

#define vis_r2m(op,rd,mem) \
131
        __asm__ volatile (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
132 133

#define vis_r2m_2(op,rd,mem1,mem2) \
134
        __asm__ volatile (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
135 136

#define vis_m2r(op,mem,rd) \
137
        __asm__ volatile (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
138 139

#define vis_m2r_2(op,mem1,mem2,rd) \
140
        __asm__ volatile (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
141 142 143

static inline void vis_set_gsr(unsigned int _val)
{
144
        register unsigned int val __asm__("g1");
145

146
        val = _val;
147
        __asm__ volatile(".word 0xa7804000"
148
                             : : "r" (val));
149 150
}

151 152 153 154
#define VIS_GSR_ALIGNADDR_MASK          0x0000007
#define VIS_GSR_ALIGNADDR_SHIFT         0
#define VIS_GSR_SCALEFACT_MASK          0x0000078
#define VIS_GSR_SCALEFACT_SHIFT         3
155

156 157 158 159 160 161 162 163
#define vis_ld32(mem,rs1)               vis_m2r(ld, mem, rs1)
#define vis_ld32_2(mem1,mem2,rs1)       vis_m2r_2(ld, mem1, mem2, rs1)
#define vis_st32(rs1,mem)               vis_r2m(st, rs1, mem)
#define vis_st32_2(rs1,mem1,mem2)       vis_r2m_2(st, rs1, mem1, mem2)
#define vis_ld64(mem,rs1)               vis_m2r(ldd, mem, rs1)
#define vis_ld64_2(mem1,mem2,rs1)       vis_m2r_2(ldd, mem1, mem2, rs1)
#define vis_st64(rs1,mem)               vis_r2m(std, rs1, mem)
#define vis_st64_2(rs1,mem1,mem2)       vis_r2m_2(std, rs1, mem1, mem2)
164 165

#define vis_ldblk(mem, rd) \
166
do {        register void *__mem __asm__("g1"); \
167
        __mem = &(mem); \
168
        __asm__ volatile(".word 0xc1985e00 | %1" \
169 170 171 172
                             : \
                             : "r" (__mem), \
                               "i" (vis_rd_d(rd)) \
                             : "memory"); \
173 174 175
} while (0)

#define vis_stblk(rd, mem) \
176
do {        register void *__mem __asm__("g1"); \
177
        __mem = &(mem); \
178
        __asm__ volatile(".word 0xc1b85e00 | %1" \
179 180 181 182
                             : \
                             : "r" (__mem), \
                               "i" (vis_rd_d(rd)) \
                             : "memory"); \
183 184
} while (0)

185
#define vis_membar_storestore()        \
186
        __asm__ volatile(".word 0x8143e008" : : : "memory")
187

188
#define vis_membar_sync()        \
189
        __asm__ volatile(".word 0x8143e040" : : : "memory")
190 191 192 193 194 195 196

/* 16 and 32 bit partitioned addition and subtraction.  The normal
 * versions perform 4 16-bit or 2 32-bit additions or subtractions.
 * The 's' versions perform 2 16-bit or 1 32-bit additions or
 * subtractions.
 */

197 198 199 200 201 202 203 204
#define vis_padd16(rs1,rs2,rd)          vis_dd2d(0x50, rs1, rs2, rd)
#define vis_padd16s(rs1,rs2,rd)         vis_ss2s(0x51, rs1, rs2, rd)
#define vis_padd32(rs1,rs2,rd)          vis_dd2d(0x52, rs1, rs2, rd)
#define vis_padd32s(rs1,rs2,rd)         vis_ss2s(0x53, rs1, rs2, rd)
#define vis_psub16(rs1,rs2,rd)          vis_dd2d(0x54, rs1, rs2, rd)
#define vis_psub16s(rs1,rs2,rd)         vis_ss2s(0x55, rs1, rs2, rd)
#define vis_psub32(rs1,rs2,rd)          vis_dd2d(0x56, rs1, rs2, rd)
#define vis_psub32s(rs1,rs2,rd)         vis_ss2s(0x57, rs1, rs2, rd)
205 206 207

/* Pixel formatting instructions.  */

208 209 210 211 212
#define vis_pack16(rs2,rd)              vis_d2s( 0x3b,      rs2, rd)
#define vis_pack32(rs1,rs2,rd)          vis_dd2d(0x3a, rs1, rs2, rd)
#define vis_packfix(rs2,rd)             vis_d2s( 0x3d,      rs2, rd)
#define vis_expand(rs2,rd)              vis_s2d( 0x4d,      rs2, rd)
#define vis_pmerge(rs1,rs2,rd)          vis_ss2d(0x4b, rs1, rs2, rd)
213 214 215

/* Partitioned multiply instructions.  */

216 217 218 219 220 221 222
#define vis_mul8x16(rs1,rs2,rd)         vis_sd2d(0x31, rs1, rs2, rd)
#define vis_mul8x16au(rs1,rs2,rd)       vis_ss2d(0x33, rs1, rs2, rd)
#define vis_mul8x16al(rs1,rs2,rd)       vis_ss2d(0x35, rs1, rs2, rd)
#define vis_mul8sux16(rs1,rs2,rd)       vis_dd2d(0x36, rs1, rs2, rd)
#define vis_mul8ulx16(rs1,rs2,rd)       vis_dd2d(0x37, rs1, rs2, rd)
#define vis_muld8sux16(rs1,rs2,rd)      vis_ss2d(0x38, rs1, rs2, rd)
#define vis_muld8ulx16(rs1,rs2,rd)      vis_ss2d(0x39, rs1, rs2, rd)
223 224 225

/* Alignment instructions.  */

226
static inline const void *vis_alignaddr(const void *_ptr)
227
{
228
        register const void *ptr __asm__("g1");
229

230
        ptr = _ptr;
231

232
        __asm__ volatile(".word %2"
233 234 235 236 237 238
                             : "=&r" (ptr)
                             : "0" (ptr),
                               "i" (vis_opc_base | vis_opf(0x18) |
                                    vis_rs1_s(1) |
                                    vis_rs2_s(0) |
                                    vis_rd_s(1)));
239

240
        return ptr;
241 242 243 244
}

static inline void vis_alignaddr_g0(void *_ptr)
{
245
        register void *ptr __asm__("g1");
246

247
        ptr = _ptr;
248

249
        __asm__ volatile(".word %2"
250 251 252 253 254 255
                             : "=&r" (ptr)
                             : "0" (ptr),
                               "i" (vis_opc_base | vis_opf(0x18) |
                                    vis_rs1_s(1) |
                                    vis_rs2_s(0) |
                                    vis_rd_s(0)));
256 257 258 259
}

static inline void *vis_alignaddrl(void *_ptr)
{
260
        register void *ptr __asm__("g1");
261

262
        ptr = _ptr;
263

264
        __asm__ volatile(".word %2"
265 266 267 268 269 270
                             : "=&r" (ptr)
                             : "0" (ptr),
                               "i" (vis_opc_base | vis_opf(0x19) |
                                    vis_rs1_s(1) |
                                    vis_rs2_s(0) |
                                    vis_rd_s(1)));
271

272
        return ptr;
273 274 275 276
}

static inline void vis_alignaddrl_g0(void *_ptr)
{
277
        register void *ptr __asm__("g1");
278

279
        ptr = _ptr;
280

281
        __asm__ volatile(".word %2"
282 283 284 285 286 287
                             : "=&r" (ptr)
                             : "0" (ptr),
                               "i" (vis_opc_base | vis_opf(0x19) |
                                    vis_rs1_s(1) |
                                    vis_rs2_s(0) |
                                    vis_rd_s(0)));
288 289
}

290
#define vis_faligndata(rs1,rs2,rd)        vis_dd2d(0x48, rs1, rs2, rd)
291 292 293

/* Logical operate instructions.  */

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
#define vis_fzero(rd)                   vis_d(   0x60,           rd)
#define vis_fzeros(rd)                  vis_s(   0x61,           rd)
#define vis_fone(rd)                    vis_d(   0x7e,           rd)
#define vis_fones(rd)                   vis_s(   0x7f,           rd)
#define vis_src1(rs1,rd)                vis_d12d(0x74, rs1,      rd)
#define vis_src1s(rs1,rd)               vis_s12s(0x75, rs1,      rd)
#define vis_src2(rs2,rd)                vis_d22d(0x78,      rs2, rd)
#define vis_src2s(rs2,rd)               vis_s22s(0x79,      rs2, rd)
#define vis_not1(rs1,rd)                vis_d12d(0x6a, rs1,      rd)
#define vis_not1s(rs1,rd)               vis_s12s(0x6b, rs1,      rd)
#define vis_not2(rs2,rd)                vis_d22d(0x66,      rs2, rd)
#define vis_not2s(rs2,rd)               vis_s22s(0x67,      rs2, rd)
#define vis_or(rs1,rs2,rd)              vis_dd2d(0x7c, rs1, rs2, rd)
#define vis_ors(rs1,rs2,rd)             vis_ss2s(0x7d, rs1, rs2, rd)
#define vis_nor(rs1,rs2,rd)             vis_dd2d(0x62, rs1, rs2, rd)
#define vis_nors(rs1,rs2,rd)            vis_ss2s(0x63, rs1, rs2, rd)
#define vis_and(rs1,rs2,rd)             vis_dd2d(0x70, rs1, rs2, rd)
#define vis_ands(rs1,rs2,rd)            vis_ss2s(0x71, rs1, rs2, rd)
#define vis_nand(rs1,rs2,rd)            vis_dd2d(0x6e, rs1, rs2, rd)
#define vis_nands(rs1,rs2,rd)           vis_ss2s(0x6f, rs1, rs2, rd)
#define vis_xor(rs1,rs2,rd)             vis_dd2d(0x6c, rs1, rs2, rd)
#define vis_xors(rs1,rs2,rd)            vis_ss2s(0x6d, rs1, rs2, rd)
#define vis_xnor(rs1,rs2,rd)            vis_dd2d(0x72, rs1, rs2, rd)
#define vis_xnors(rs1,rs2,rd)           vis_ss2s(0x73, rs1, rs2, rd)
#define vis_ornot1(rs1,rs2,rd)          vis_dd2d(0x7a, rs1, rs2, rd)
#define vis_ornot1s(rs1,rs2,rd)         vis_ss2s(0x7b, rs1, rs2, rd)
#define vis_ornot2(rs1,rs2,rd)          vis_dd2d(0x76, rs1, rs2, rd)
#define vis_ornot2s(rs1,rs2,rd)         vis_ss2s(0x77, rs1, rs2, rd)
#define vis_andnot1(rs1,rs2,rd)         vis_dd2d(0x68, rs1, rs2, rd)
#define vis_andnot1s(rs1,rs2,rd)        vis_ss2s(0x69, rs1, rs2, rd)
#define vis_andnot2(rs1,rs2,rd)         vis_dd2d(0x64, rs1, rs2, rd)
#define vis_andnot2s(rs1,rs2,rd)        vis_ss2s(0x65, rs1, rs2, rd)
326 327 328

/* Pixel component distance.  */

329
#define vis_pdist(rs1,rs2,rd)           vis_dd2d(0x3e, rs1, rs2, rd)
330

331
#endif /* AVCODEC_SPARC_VIS_H */