Commit c855532a authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Move FMA opcodes into a list macro

Bug: v8:9415
Bug: v8:10021
Change-Id: I77c24b58f575b612e5422bfcb9bb7ab83986659a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1986249Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65616}
parent d22326bb
......@@ -3019,6 +3019,7 @@ v8_source_set("v8_base_without_compiler") {
"src/codegen/x64/assembler-x64.h",
"src/codegen/x64/constants-x64.h",
"src/codegen/x64/cpu-x64.cc",
"src/codegen/x64/fma-instr.h",
"src/codegen/x64/interface-descriptors-x64.cc",
"src/codegen/x64/macro-assembler-x64.cc",
"src/codegen/x64/macro-assembler-x64.h",
......
......@@ -3466,74 +3466,22 @@ void Assembler::vbroadcastss(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kLIG, k66, k0F38, kW1);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
Operand src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kLIG, k66, k0F38, kW1);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kLIG, k66, k0F38, kW0);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
Operand src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kLIG, k66, k0F38, kW0);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmaps(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL128, k66, k0F38, kW0);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmaps(byte op, XMMRegister dst, XMMRegister src1,
Operand src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL128, k66, k0F38, kW0);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmapd(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
void Assembler::fma_instr(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2, VectorLength l, SIMDPrefix pp,
LeadingOpcode m, VexW w) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL128, k66, k0F38, kW1);
emit_vex_prefix(dst, src1, src2, l, pp, m, w);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmapd(byte op, XMMRegister dst, XMMRegister src1,
Operand src2) {
void Assembler::fma_instr(byte op, XMMRegister dst, XMMRegister src1,
Operand src2, VectorLength l, SIMDPrefix pp,
LeadingOpcode m, VexW w) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL128, k66, k0F38, kW1);
emit_vex_prefix(dst, src1, src2, l, pp, m, w);
emit(op);
emit_sse_operand(dst, src2);
}
......
......@@ -45,6 +45,7 @@
#include "src/codegen/assembler.h"
#include "src/codegen/label.h"
#include "src/codegen/x64/constants-x64.h"
#include "src/codegen/x64/fma-instr.h"
#include "src/codegen/x64/register-x64.h"
#include "src/codegen/x64/sse-instr.h"
#include "src/objects/smi.h"
......@@ -1138,185 +1139,23 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmovddup(XMMRegister dst, XMMRegister src);
void vmovddup(XMMRegister dst, Operand src);
void vbroadcastss(XMMRegister dst, Operand src);
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x99, dst, src1, src2);
}
void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xa9, dst, src1, src2);
}
void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xb9, dst, src1, src2);
}
void vfmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0x99, dst, src1, src2);
}
void vfmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xa9, dst, src1, src2);
}
void vfmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xb9, dst, src1, src2);
}
void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x9b, dst, src1, src2);
}
void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xab, dst, src1, src2);
}
void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xbb, dst, src1, src2);
}
void vfmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0x9b, dst, src1, src2);
}
void vfmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xab, dst, src1, src2);
}
void vfmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xbb, dst, src1, src2);
}
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x9d, dst, src1, src2);
}
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xad, dst, src1, src2);
}
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xbd, dst, src1, src2);
}
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0x9d, dst, src1, src2);
}
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xad, dst, src1, src2);
}
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xbd, dst, src1, src2);
}
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x9f, dst, src1, src2);
}
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xaf, dst, src1, src2);
}
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0xbf, dst, src1, src2);
}
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0x9f, dst, src1, src2);
}
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xaf, dst, src1, src2);
}
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmasd(0xbf, dst, src1, src2);
}
void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vfmasd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x99, dst, src1, src2);
}
void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xa9, dst, src1, src2);
}
void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xb9, dst, src1, src2);
}
void vfmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0x99, dst, src1, src2);
}
void vfmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xa9, dst, src1, src2);
}
void vfmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xb9, dst, src1, src2);
}
void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x9b, dst, src1, src2);
}
void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xab, dst, src1, src2);
}
void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xbb, dst, src1, src2);
}
void vfmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0x9b, dst, src1, src2);
}
void vfmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xab, dst, src1, src2);
}
void vfmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xbb, dst, src1, src2);
}
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x9d, dst, src1, src2);
}
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xad, dst, src1, src2);
}
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xbd, dst, src1, src2);
}
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0x9d, dst, src1, src2);
}
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xad, dst, src1, src2);
}
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xbd, dst, src1, src2);
}
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0x9f, dst, src1, src2);
}
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xaf, dst, src1, src2);
}
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmass(0xbf, dst, src1, src2);
}
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0x9f, dst, src1, src2);
}
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xaf, dst, src1, src2);
}
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmass(0xbf, dst, src1, src2);
}
void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vfmass(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmaps(0xb8, dst, src1, src2);
}
void vfmadd231ps(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmaps(0xb8, dst, src1, src2);
}
void vfnmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmaps(0xbc, dst, src1, src2);
}
void vfnmadd231ps(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmaps(0xbc, dst, src1, src2);
}
void vfmaps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vfmaps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
void vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmapd(0xb8, dst, src1, src2);
}
void vfmadd231pd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmapd(0xb8, dst, src1, src2);
}
void vfnmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmapd(0xbc, dst, src1, src2);
}
void vfnmadd231pd(XMMRegister dst, XMMRegister src1, Operand src2) {
vfmapd(0xbc, dst, src1, src2);
#define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \
void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
k##escape1##escape2, k##extension); \
} \
void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
k##escape1##escape2, k##extension); \
}
void vfmapd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vfmapd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
FMA_INSTRUCTION_LIST(FMA)
#undef FMA
void vmovd(XMMRegister dst, Register src);
void vmovd(XMMRegister dst, Operand src);
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
#ifndef V8_CODEGEN_X64_FMA_INSTR_H_
#define V8_CODEGEN_X64_FMA_INSTR_H_
#define FMA_INSTRUCTION_LIST(V) \
V(vfmadd132sd, L128, 66, 0F, 38, W1, 99) \
V(vfmadd213sd, L128, 66, 0F, 38, W1, a9) \
V(vfmadd231sd, L128, 66, 0F, 38, W1, b9) \
V(vfmsub132sd, L128, 66, 0F, 38, W1, 9b) \
V(vfmsub213sd, L128, 66, 0F, 38, W1, ab) \
V(vfmsub231sd, L128, 66, 0F, 38, W1, bb) \
V(vfnmadd132sd, L128, 66, 0F, 38, W1, 9d) \
V(vfnmadd213sd, L128, 66, 0F, 38, W1, ad) \
V(vfnmadd231sd, L128, 66, 0F, 38, W1, bd) \
V(vfnmsub132sd, L128, 66, 0F, 38, W1, 9f) \
V(vfnmsub213sd, L128, 66, 0F, 38, W1, af) \
V(vfnmsub231sd, L128, 66, 0F, 38, W1, bf) \
V(vfmadd132ss, LIG, 66, 0F, 38, W0, 99) \
V(vfmadd213ss, LIG, 66, 0F, 38, W0, a9) \
V(vfmadd231ss, LIG, 66, 0F, 38, W0, b9) \
V(vfmsub132ss, LIG, 66, 0F, 38, W0, 9b) \
V(vfmsub213ss, LIG, 66, 0F, 38, W0, ab) \
V(vfmsub231ss, LIG, 66, 0F, 38, W0, bb) \
V(vfnmadd132ss, LIG, 66, 0F, 38, W0, 9d) \
V(vfnmadd213ss, LIG, 66, 0F, 38, W0, ad) \
V(vfnmadd231ss, LIG, 66, 0F, 38, W0, bd) \
V(vfnmsub132ss, LIG, 66, 0F, 38, W0, 9f) \
V(vfnmsub213ss, LIG, 66, 0F, 38, W0, af) \
V(vfnmsub231ss, LIG, 66, 0F, 38, W0, bf) \
V(vfmadd231ps, L128, 66, 0F, 38, W0, b8) \
V(vfnmadd231ps, L128, 66, 0F, 38, W0, bc) \
V(vfmadd231pd, L128, 66, 0F, 38, W1, b8) \
V(vfnmadd231pd, L128, 66, 0F, 38, W1, bc)
#endif // V8_CODEGEN_X64_FMA_INSTR_H_
......@@ -791,77 +791,12 @@ TEST(DisasmX64) {
{
if (CpuFeatures::IsSupported(FMA3)) {
CpuFeatureScope scope(&assm, FMA3);
__ vfmadd132sd(xmm0, xmm1, xmm2);
__ vfmadd132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd213sd(xmm0, xmm1, xmm2);
__ vfmadd213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd231sd(xmm0, xmm1, xmm2);
__ vfmadd231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd132sd(xmm9, xmm10, xmm11);
__ vfmadd132sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
__ vfmadd213sd(xmm9, xmm10, xmm11);
__ vfmadd213sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
__ vfmadd231sd(xmm9, xmm10, xmm11);
__ vfmadd231sd(xmm9, xmm10, Operand(r9, r11, times_4, 10000));
__ vfmsub132sd(xmm0, xmm1, xmm2);
__ vfmsub132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub213sd(xmm0, xmm1, xmm2);
__ vfmsub213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub231sd(xmm0, xmm1, xmm2);
__ vfmsub231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd132sd(xmm0, xmm1, xmm2);
__ vfnmadd132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd213sd(xmm0, xmm1, xmm2);
__ vfnmadd213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd231sd(xmm0, xmm1, xmm2);
__ vfnmadd231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub132sd(xmm0, xmm1, xmm2);
__ vfnmsub132sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub213sd(xmm0, xmm1, xmm2);
__ vfnmsub213sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub231sd(xmm0, xmm1, xmm2);
__ vfnmsub231sd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd132ss(xmm0, xmm1, xmm2);
__ vfmadd132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd213ss(xmm0, xmm1, xmm2);
__ vfmadd213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd231ss(xmm0, xmm1, xmm2);
__ vfmadd231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub132ss(xmm0, xmm1, xmm2);
__ vfmsub132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub213ss(xmm0, xmm1, xmm2);
__ vfmsub213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmsub231ss(xmm0, xmm1, xmm2);
__ vfmsub231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd132ss(xmm0, xmm1, xmm2);
__ vfnmadd132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd213ss(xmm0, xmm1, xmm2);
__ vfnmadd213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd231ss(xmm0, xmm1, xmm2);
__ vfnmadd231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub132ss(xmm0, xmm1, xmm2);
__ vfnmsub132ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub213ss(xmm0, xmm1, xmm2);
__ vfnmsub213ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmsub231ss(xmm0, xmm1, xmm2);
__ vfnmsub231ss(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd231ps(xmm0, xmm1, xmm2);
__ vfmadd231ps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd231ps(xmm0, xmm1, xmm2);
__ vfnmadd231ps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfmadd231pd(xmm0, xmm1, xmm2);
__ vfmadd231pd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vfnmadd231pd(xmm0, xmm1, xmm2);
__ vfnmadd231pd(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
#define EMIT_FMA(instr, notUsed1, notUsed2, notUsed3, notUsed4, notUsed5, \
notUsed6) \
__ instr(xmm9, xmm10, xmm11); \
__ instr(xmm9, xmm10, Operand(rbx, rcx, times_4, 10000));
FMA_INSTRUCTION_LIST(EMIT_FMA)
#undef EMIT_FMA
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment