PPC: Introduce Power10 prefixed store instructions

This CL adds prefixed store scalar and floating point instructions to the assembler and uses it during code generation if the processor supports it. They have also been added to the disassembler and the simulator. Change-Id: I0b9e0758f17ca6b86d4f2f2bb36be87fba14ecb7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3626173Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/main@{#80381}

PPC: Introduce Power10 prefixed store instructions
This CL adds prefixed store scalar and floating point instructions to the assembler and uses it during code generation if the processor supports it. They have also been added to the disassembler and the simulator. Change-Id: I0b9e0758f17ca6b86d4f2f2bb36be87fba14ecb7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3626173Reviewed-by: Junliang Yan <junyan@redhat.com> Commit-Queue: Milad Farazmand <mfarazma@redhat.com> Cr-Commit-Position: refs/heads/main@{#80381}
78967fbb · Milad Fa · V8 LUCI CQ · b3347578 · 78967fbb · 78967fbb
Commit 78967fbb authored May 04, 2022 by Milad Fa Committed by V8 LUCI CQ May 05, 2022
8 changed files
--- a/src/codegen/ppc/assembler-ppc.cc
+++ b/src/codegen/ppc/assembler-ppc.cc
@@ -1256,6 +1256,60 @@ void Assembler::plfd(DoubleRegister dst, const MemOperand& src) {
  pload_store_mls(Operand(hi));
  lfd(dst, MemOperand(src.ra(), lo));
 }
+void Assembler::pstb(Register src, const MemOperand& dst) {
+  DCHECK(dst.ra_ != r0);
+  int64_t offset = dst.offset();
+  GENERATE_PREFIX_SUFFIX_BITS(offset, hi, lo)
+  BlockTrampolinePoolScope block_trampoline_pool(this);
+  pload_store_mls(Operand(hi));
+  stb(src, MemOperand(dst.ra(), lo));
+}
+void Assembler::psth(Register src, const MemOperand& dst) {
+  DCHECK(dst.ra_ != r0);
+  int64_t offset = dst.offset();
+  GENERATE_PREFIX_SUFFIX_BITS(offset, hi, lo)
+  BlockTrampolinePoolScope block_trampoline_pool(this);
+  pload_store_mls(Operand(hi));
+  sth(src, MemOperand(dst.ra(), lo));
+}
+void Assembler::pstw(Register src, const MemOperand& dst) {
+  DCHECK(dst.ra_ != r0);
+  int64_t offset = dst.offset();
+  GENERATE_PREFIX_SUFFIX_BITS(offset, hi, lo)
+  BlockTrampolinePoolScope block_trampoline_pool(this);
+  pload_store_mls(Operand(hi));
+  stw(src, MemOperand(dst.ra(), lo));
+}
+void Assembler::pstd(Register src, const MemOperand& dst) {
+  DCHECK(dst.ra_ != r0);
+  int64_t offset = dst.offset();
+  GENERATE_PREFIX_SUFFIX_BITS(offset, hi, lo)
+  BlockTrampolinePoolScope block_trampoline_pool(this);
+  pload_store_8ls(Operand(hi));
+  emit(PPSTD | src.code() * B21 | dst.ra().code() * B16 | (lo & kImm16Mask));
+}
+void Assembler::pstfs(const DoubleRegister src, const MemOperand& dst) {
+  DCHECK(dst.ra_ != r0);
+  int64_t offset = dst.offset();
+  GENERATE_PREFIX_SUFFIX_BITS(offset, hi, lo)
+  BlockTrampolinePoolScope block_trampoline_pool(this);
+  pload_store_mls(Operand(hi));
+  stfs(src, MemOperand(dst.ra(), lo));
+}
+void Assembler::pstfd(const DoubleRegister src, const MemOperand& dst) {
+  DCHECK(dst.ra_ != r0);
+  int64_t offset = dst.offset();
+  GENERATE_PREFIX_SUFFIX_BITS(offset, hi, lo)
+  BlockTrampolinePoolScope block_trampoline_pool(this);
+  pload_store_mls(Operand(hi));
+  stfd(src, MemOperand(dst.ra(), lo));
+}
 #undef GENERATE_PREFIX_SUFFIX_BITS
 int Assembler::instructions_required_for_mov(Register dst,

--- a/src/codegen/ppc/assembler-ppc.h
+++ b/src/codegen/ppc/assembler-ppc.h
@@ -1147,6 +1147,12 @@ class Assembler : public AssemblerBase {
  void pld(Register dst, const MemOperand& src);
  void plfs(DoubleRegister dst, const MemOperand& src);
  void plfd(DoubleRegister dst, const MemOperand& src);
+  void pstb(Register src, const MemOperand& dst);
+  void psth(Register src, const MemOperand& dst);
+  void pstw(Register src, const MemOperand& dst);
+  void pstd(Register src, const MemOperand& dst);
+  void pstfs(const DoubleRegister src, const MemOperand& dst);
+  void pstfd(const DoubleRegister src, const MemOperand& dst);
  // Pseudo instructions

--- a/src/codegen/ppc/constants-ppc.h
+++ b/src/codegen/ppc/constants-ppc.h
@@ -2681,7 +2681,8 @@ immediate-specified index */                 \
 #define PPC_PREFIX_OPCODE_TYPE_00_LIST(V)        \
  V(pload_store_8ls, PLOAD_STORE_8LS, 0x4000000) \
  V(pplwa, PPLWA, 0xA4000000)                    \
-  V(ppld, PPLD, 0xE4000000)
+  V(ppld, PPLD, 0xE4000000)                      \
+  V(ppstd, PPSTD, 0xF4000000)
 #define PPC_PREFIX_OPCODE_TYPE_10_LIST(V) \
  V(pload_store_mls, PLOAD_STORE_MLS, 0x6000000)

--- a/src/codegen/ppc/macro-assembler-ppc.cc
+++ b/src/codegen/ppc/macro-assembler-ppc.cc
@@ -595,6 +595,14 @@ void TurboAssembler::StoreTaggedField(const Register& value,
    StoreU32(value, dst_field_operand, scratch);
    RecordComment("]");
  } else {
+    // TODO(miladfarca): move this block into StoreU64.
+    if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
+      if (dst_field_operand.rb() == no_reg &&
+          is_int34(dst_field_operand.offset())) {
+        pstd(value, dst_field_operand);
+        return;
+      }
+    }
    StoreU64(value, dst_field_operand, scratch);
  }
 }

--- a/src/compiler/backend/ppc/code-generator-ppc.cc
+++ b/src/compiler/backend/ppc/code-generator-ppc.cc
@@ -504,40 +504,48 @@ Condition FlagsConditionToCondition(FlagsCondition condition, ArchOpcode op) {
    DCHECK_EQ(LeaveRC, i.OutputRCBit());         \
  } while (0)
-#define ASSEMBLE_STORE_FLOAT(asm_instr, asm_instrx)      \
+#define ASSEMBLE_STORE_FLOAT(asm_instr, asm_instrp, asm_instrx) \
-  do {                                                   \
+  do {                                                          \
-    size_t index = 0;                                    \
+    size_t index = 0;                                           \
-    AddressingMode mode = kMode_None;                    \
+    AddressingMode mode = kMode_None;                           \
-    MemOperand operand = i.MemoryOperand(&mode, &index); \
+    MemOperand operand = i.MemoryOperand(&mode, &index);        \
-    DoubleRegister value = i.InputDoubleRegister(index); \
+    DoubleRegister value = i.InputDoubleRegister(index);        \
-    bool is_atomic = i.InputInt32(3);                    \
+    bool is_atomic = i.InputInt32(3);                           \
-    if (is_atomic) __ lwsync();                          \
+    if (is_atomic) __ lwsync();                                 \
-    /* removed frsp as instruction-selector checked */   \
+    /* removed frsp as instruction-selector checked */          \
-    /* value to be kFloat32 */                           \
+    /* value to be kFloat32 */                                  \
-    if (mode == kMode_MRI) {                             \
+    if (mode == kMode_MRI) {                                    \
-      __ asm_instr(value, operand);                      \
+      if (CpuFeatures::IsSupported(PPC_10_PLUS)) {              \
-    } else {                                             \
+        __ asm_instrp(value, operand);                          \
-      __ asm_instrx(value, operand);                     \
+      } else {                                                  \
-    }                                                    \
+        __ asm_instr(value, operand);                           \
-    if (is_atomic) __ sync();                            \
+      }                                                         \
-    DCHECK_EQ(LeaveRC, i.OutputRCBit());                 \
+    } else {                                                    \
+      __ asm_instrx(value, operand);                            \
+    }                                                           \
+    if (is_atomic) __ sync();                                   \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());                        \
  } while (0)
-#define ASSEMBLE_STORE_INTEGER(asm_instr, asm_instrx)    \
+#define ASSEMBLE_STORE_INTEGER(asm_instr, asm_instrp, asm_instrx) \
-  do {                                                   \
+  do {                                                            \
-    size_t index = 0;                                    \
+    size_t index = 0;                                             \
-    AddressingMode mode = kMode_None;                    \
+    AddressingMode mode = kMode_None;                             \
-    MemOperand operand = i.MemoryOperand(&mode, &index); \
+    MemOperand operand = i.MemoryOperand(&mode, &index);          \
-    Register value = i.InputRegister(index);             \
+    Register value = i.InputRegister(index);                      \
-    bool is_atomic = i.InputInt32(3);                    \
+    bool is_atomic = i.InputInt32(3);                             \
-    if (is_atomic) __ lwsync();                          \
+    if (is_atomic) __ lwsync();                                   \
-    if (mode == kMode_MRI) {                             \
+    if (mode == kMode_MRI) {                                      \
-      __ asm_instr(value, operand);                      \
+      if (CpuFeatures::IsSupported(PPC_10_PLUS)) {                \
-    } else {                                             \
+        __ asm_instrp(value, operand);                            \
-      __ asm_instrx(value, operand);                     \
+      } else {                                                    \
-    }                                                    \
+        __ asm_instr(value, operand);                             \
-    if (is_atomic) __ sync();                            \
+      }                                                           \
-    DCHECK_EQ(LeaveRC, i.OutputRCBit());                 \
+    } else {                                                      \
+      __ asm_instrx(value, operand);                              \
+    }                                                             \
+    if (is_atomic) __ sync();                                     \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());                          \
  } while (0)
 #define ASSEMBLE_STORE_INTEGER_RR(asm_instr)             \
@@ -2004,24 +2012,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
    case kPPC_StoreWord8:
-      ASSEMBLE_STORE_INTEGER(stb, stbx);
+      ASSEMBLE_STORE_INTEGER(stb, pstb, stbx);
      break;
    case kPPC_StoreWord16:
-      ASSEMBLE_STORE_INTEGER(sth, sthx);
+      ASSEMBLE_STORE_INTEGER(sth, psth, sthx);
      break;
    case kPPC_StoreWord32:
-      ASSEMBLE_STORE_INTEGER(stw, stwx);
+      ASSEMBLE_STORE_INTEGER(stw, pstw, stwx);
      break;
 #if V8_TARGET_ARCH_PPC64
    case kPPC_StoreWord64:
-      ASSEMBLE_STORE_INTEGER(std, stdx);
+      ASSEMBLE_STORE_INTEGER(std, pstd, stdx);
      break;
 #endif
    case kPPC_StoreFloat32:
-      ASSEMBLE_STORE_FLOAT(stfs, stfsx);
+      ASSEMBLE_STORE_FLOAT(stfs, pstfs, stfsx);
      break;
    case kPPC_StoreDouble:
-      ASSEMBLE_STORE_FLOAT(stfd, stfdx);
+      ASSEMBLE_STORE_FLOAT(stfd, pstfd, stfdx);
      break;
    case kPPC_StoreSimd128: {
      size_t index = 0;
@@ -3769,7 +3777,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      break;
    }
    case kPPC_StoreCompressTagged: {
-      ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedField);
+      ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedField,
+                             StoreTaggedField);
      break;
    }
    case kPPC_LoadDecompressTaggedSigned: {

--- a/src/compiler/backend/ppc/instruction-selector-ppc.cc
+++ b/src/compiler/backend/ppc/instruction-selector-ppc.cc
@@ -322,7 +322,12 @@ void VisitStoreCommon(InstructionSelector* selector, Node* node,
    selector->Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
  } else {
    ArchOpcode opcode;
-    ImmediateMode mode = kInt16Imm;
+    ImmediateMode mode;
+    if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
+      mode = kInt34Imm;
+    } else {
+      mode = kInt16Imm;
+    }
    NodeMatcher m(value);
    switch (rep) {
      case MachineRepresentation::kFloat32:
@@ -358,12 +363,12 @@ void VisitStoreCommon(InstructionSelector* selector, Node* node,
      case MachineRepresentation::kTaggedSigned:   // Fall through.
      case MachineRepresentation::kTaggedPointer:  // Fall through.
      case MachineRepresentation::kTagged:
-        mode = kInt16Imm_4ByteAligned;
+        if (mode != kInt34Imm) mode = kInt16Imm_4ByteAligned;
        opcode = kPPC_StoreCompressTagged;
        break;
      case MachineRepresentation::kWord64:
        opcode = kPPC_StoreWord64;
-        mode = kInt16Imm_4ByteAligned;
+        if (mode != kInt34Imm) mode = kInt16Imm_4ByteAligned;
        if (m.IsWord64ReverseBytes()) {
          opcode = kPPC_StoreByteRev64;
          value = value->InputAt(0);

--- a/src/diagnostics/ppc/disasm-ppc.cc
+++ b/src/diagnostics/ppc/disasm-ppc.cc
@@ -514,7 +514,7 @@ void Decoder::DecodeExtP(Instruction* instr) {
        }
          // Prefixed LD.
        case PPLD: {
-          Format(next_instr, "pld    'rt, 'int34('ra)");
+          Format(next_instr, "pld     'rt, 'int34('ra)");
          break;
        }
          // Prefixed LFS.
@@ -526,6 +526,36 @@ void Decoder::DecodeExtP(Instruction* instr) {
        case LFD: {
          Format(next_instr, "plfd    'Dt, 'int34('ra)");
          break;
+        }
+          // Prefixed STB.
+        case STB: {
+          Format(next_instr, "pstb    'rs, 'int34('ra)");
+          break;
+        }
+        // Prefixed STH.
+        case STH: {
+          Format(next_instr, "psth    'rs, 'int34('ra)");
+          break;
+        }
+        // Prefixed STW.
+        case STW: {
+          Format(next_instr, "pstw    'rs, 'int34('ra)");
+          break;
+        }
+        // Prefixed STD.
+        case PPSTD: {
+          Format(next_instr, "pstd    'rs, 'int34('ra)");
+          break;
+        }
+        // Prefixed STFS.
+        case STFS: {
+          Format(next_instr, "pstfs   'Dt, 'int34('ra)");
+          break;
+        }
+        // Prefixed STFD.
+        case STFD: {
+          Format(next_instr, "pstfd   'Dt, 'int34('ra)");
+          break;
        }
        default: {
          Unknown(instr);

--- a/src/execution/ppc/simulator-ppc.cc
+++ b/src/execution/ppc/simulator-ppc.cc
@@ -1651,6 +1651,71 @@ void Simulator::ExecuteGeneric(Instruction* instr) {
          set_d_register(frt, dptr);
          break;
        }
+        // Prefixed STB.
+        case STB: {
+          int ra = next_instr->RAValue();
+          int rs = next_instr->RSValue();
+          intptr_t ra_val = ra == 0 ? 0 : get_register(ra);
+          WriteB(ra_val + im_val, get_register(rs));
+          break;
+        }
+        // Prefixed STH.
+        case STH: {
+          int ra = next_instr->RAValue();
+          int rs = next_instr->RSValue();
+          intptr_t ra_val = ra == 0 ? 0 : get_register(ra);
+          WriteH(ra_val + im_val, get_register(rs));
+          break;
+        }
+        // Prefixed STW.
+        case STW: {
+          int ra = next_instr->RAValue();
+          int rs = next_instr->RSValue();
+          intptr_t ra_val = ra == 0 ? 0 : get_register(ra);
+          WriteW(ra_val + im_val, get_register(rs));
+          break;
+        }
+        // Prefixed STD.
+        case PPSTD: {
+          int ra = next_instr->RAValue();
+          int rs = next_instr->RSValue();
+          intptr_t ra_val = ra == 0 ? 0 : get_register(ra);
+          WriteDW(ra_val + im_val, get_register(rs));
+          break;
+        }
+        // Prefixed STFS.
+        case STFS: {
+          int frs = next_instr->RSValue();
+          int ra = next_instr->RAValue();
+          intptr_t ra_val = ra == 0 ? 0 : get_register(ra);
+          float frs_val = static_cast<float>(get_double_from_d_register(frs));
+          int32_t* p;
+#if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
+          // Conversion using double changes sNan to qNan on ia32/x64
+          int32_t sval = 0;
+          int64_t dval = get_d_register(frs);
+          if ((dval & 0x7FF0000000000000) == 0x7FF0000000000000) {
+            sval = ((dval & 0xC000000000000000) >> 32) |
+                   ((dval & 0x07FFFFFFE0000000) >> 29);
+            p = &sval;
+          } else {
+            p = reinterpret_cast<int32_t*>(&frs_val);
+          }
+#else
+          p = reinterpret_cast<int32_t*>(&frs_val);
+#endif
+          WriteW(ra_val + im_val, *p);
+          break;
+        }
+        // Prefixed STFD.
+        case STFD: {
+          int frs = next_instr->RSValue();
+          int ra = next_instr->RAValue();
+          intptr_t ra_val = ra == 0 ? 0 : get_register(ra);
+          int64_t frs_val = get_d_register(frs);
+          WriteDW(ra_val + im_val, frs_val);
+          break;
+        }
        default:
          UNREACHABLE();
      }