Commit 701790fd authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Support multi-byte nop instructions as recommended by

the optimization guides from AMD and Intel.
Review URL: http://codereview.chromium.org/8776033

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10134 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent d5fdb760
......@@ -388,8 +388,91 @@ void Assembler::GetCode(CodeDesc* desc) {
void Assembler::Align(int m) {
ASSERT(IsPowerOf2(m));
while ((pc_offset() & (m - 1)) != 0) {
nop();
int mask = m - 1;
int addr = pc_offset();
Nop((m - (addr & mask)) & mask);
}
bool Assembler::IsNop(Address addr) {
Address a = addr;
while (*a == 0x66) a++;
if (*a == 0x90) return true;
if (a[0] == 0xf && a[1] == 0x1f) return true;
return false;
}
void Assembler::Nop(int bytes) {
EnsureSpace ensure_space(this);
if (!CpuFeatures::IsSupported(SSE2)) {
// Older CPUs that do not support SSE2 may not support multibyte NOP
// instructions.
for (; bytes > 0; bytes--) {
EMIT(0x90);
}
return;
}
// Multi byte nops from http://support.amd.com/us/Processor_TechDocs/40546.pdf
while (bytes > 0) {
switch (bytes) {
case 2:
EMIT(0x66);
case 1:
EMIT(0x90);
return;
case 3:
EMIT(0xf);
EMIT(0x1f);
EMIT(0);
return;
case 4:
EMIT(0xf);
EMIT(0x1f);
EMIT(0x40);
EMIT(0);
return;
case 6:
EMIT(0x66);
case 5:
EMIT(0xf);
EMIT(0x1f);
EMIT(0x44);
EMIT(0);
EMIT(0);
return;
case 7:
EMIT(0xf);
EMIT(0x1f);
EMIT(0x80);
EMIT(0);
EMIT(0);
EMIT(0);
EMIT(0);
return;
default:
case 11:
EMIT(0x66);
bytes--;
case 10:
EMIT(0x66);
bytes--;
case 9:
EMIT(0x66);
bytes--;
case 8:
EMIT(0xf);
EMIT(0x1f);
EMIT(0x84);
EMIT(0);
EMIT(0);
EMIT(0);
EMIT(0);
EMIT(0);
bytes -= 8;
}
}
}
......
......@@ -659,6 +659,7 @@ class Assembler : public AssemblerBase {
// possible to align the pc offset to a multiple
// of m. m must be a power of 2.
void Align(int m);
void Nop(int bytes = 1);
// Aligns code to something that's optimal for a jump target for the platform.
void CodeTargetAlign();
......@@ -1084,7 +1085,7 @@ class Assembler : public AssemblerBase {
// Get the number of bytes available in the buffer.
inline int available_space() const { return reloc_info_writer.pos() - pc_; }
static bool IsNop(Address addr) { return *addr == 0x90; }
static bool IsNop(Address addr);
PositionsRecorder* positions_recorder() { return &positions_recorder_; }
......
......@@ -258,9 +258,7 @@ void Debug::GenerateSlot(MacroAssembler* masm) {
Label check_codesize;
__ bind(&check_codesize);
__ RecordDebugBreakSlot();
for (int i = 0; i < Assembler::kDebugBreakSlotLength; i++) {
__ nop();
}
__ Nop(Assembler::kDebugBreakSlotLength);
ASSERT_EQ(Assembler::kDebugBreakSlotLength,
masm->SizeOfCodeGeneratedSince(&check_codesize));
}
......
......@@ -231,8 +231,8 @@ void Deoptimizer::PatchStackCheckCodeAt(Code* unoptimized_code,
ASSERT(*(call_target_address - 3) == 0x73 && // jae
*(call_target_address - 2) == 0x07 && // offset
*(call_target_address - 1) == 0xe8); // call
*(call_target_address - 3) = 0x90; // nop
*(call_target_address - 2) = 0x90; // nop
*(call_target_address - 3) = 0x66; // 2 byte nop part 1
*(call_target_address - 2) = 0x90; // 2 byte nop part 2
Assembler::set_target_address_at(call_target_address,
replacement_code->entry());
......@@ -250,8 +250,8 @@ void Deoptimizer::RevertStackCheckCodeAt(Code* unoptimized_code,
Assembler::target_address_at(call_target_address));
// Replace the nops from patching (Deoptimizer::PatchStackCheckCode) to
// restore the conditional branch.
ASSERT(*(call_target_address - 3) == 0x90 && // nop
*(call_target_address - 2) == 0x90 && // nop
ASSERT(*(call_target_address - 3) == 0x66 && // 2 byte nop part 1
*(call_target_address - 2) == 0x90 && // 2 byte nop part 2
*(call_target_address - 1) == 0xe8); // call
*(call_target_address - 3) = 0x73; // jae
*(call_target_address - 2) = 0x07; // offset
......
......@@ -992,7 +992,7 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
break;
case 0x0F:
{ byte f0byte = *(data+1);
{ byte f0byte = data[1];
const char* f0mnem = F0Mnem(f0byte);
if (f0byte == 0x18) {
int mod, regop, rm;
......@@ -1000,6 +1000,25 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
const char* suffix[] = {"nta", "1", "2", "3"};
AppendToBuffer("%s%s ", f0mnem, suffix[regop & 0x03]);
data += PrintRightOperand(data);
} else if (f0byte == 0x1F && data[2] == 0) {
AppendToBuffer("nop"); // 3 byte nop.
data += 3;
} else if (f0byte == 0x1F && data[2] == 0x40 && data[3] == 0) {
AppendToBuffer("nop"); // 4 byte nop.
data += 4;
} else if (f0byte == 0x1F && data[2] == 0x44 && data[3] == 0 &&
data[4] == 0) {
AppendToBuffer("nop"); // 5 byte nop.
data += 5;
} else if (f0byte == 0x1F && data[2] == 0x80 && data[3] == 0 &&
data[4] == 0 && data[5] == 0 && data[6] == 0) {
AppendToBuffer("nop"); // 7 byte nop.
data += 7;
} else if (f0byte == 0x1F && data[2] == 0x84 && data[3] == 0 &&
data[4] == 0 && data[5] == 0 && data[6] == 0 &&
data[7] == 0) {
AppendToBuffer("nop"); // 8 byte nop.
data += 8;
} else if (f0byte == 0xA2 || f0byte == 0x31) {
AppendToBuffer("%s", f0mnem);
data += 2;
......@@ -1135,8 +1154,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
break;
case 0x66: // prefix
data++;
if (*data == 0x8B) {
while (*data == 0x66) data++;
if (*data == 0xf && data[1] == 0x1f) {
AppendToBuffer("nop"); // 0x66 prefix
} else if (*data == 0x90) {
AppendToBuffer("nop"); // 0x66 prefix
} else if (*data == 0x8B) {
data++;
data += PrintOperands("mov_w", REG_OPER_OP_ORDER, data);
} else if (*data == 0x89) {
......@@ -1273,6 +1296,9 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0x90) {
data++;
AppendToBuffer("nop"); // 2 byte nop.
} else if (*data == 0xF3) {
data++;
int mod, regop, rm;
......
......@@ -4458,9 +4458,7 @@ void LCodeGen::EnsureSpaceForLazyDeopt() {
int patch_size = Deoptimizer::patch_size();
if (current_pc < last_lazy_deopt_pc_ + patch_size) {
int padding_size = last_lazy_deopt_pc_ + patch_size - current_pc;
while (padding_size-- > 0) {
__ nop();
}
__ Nop(padding_size);
}
last_lazy_deopt_pc_ = masm()->pc_offset();
}
......
......@@ -408,4 +408,72 @@ TEST(AssemblerIa3210) {
__ nop();
}
TEST(AssemblerMultiByteNop) {
InitializeVM();
v8::HandleScope scope;
v8::internal::byte buffer[1024];
Assembler assm(Isolate::Current(), buffer, sizeof(buffer));
__ push(ebx);
__ push(ecx);
__ push(edx);
__ push(edi);
__ push(esi);
__ mov(eax, 1);
__ mov(ebx, 2);
__ mov(ecx, 3);
__ mov(edx, 4);
__ mov(edi, 5);
__ mov(esi, 6);
for (int i = 0; i < 16; i++) {
int before = assm.pc_offset();
__ Nop(i);
CHECK_EQ(assm.pc_offset() - before, i);
}
Label fail;
__ cmp(eax, 1);
__ j(not_equal, &fail);
__ cmp(ebx, 2);
__ j(not_equal, &fail);
__ cmp(ecx, 3);
__ j(not_equal, &fail);
__ cmp(edx, 4);
__ j(not_equal, &fail);
__ cmp(edi, 5);
__ j(not_equal, &fail);
__ cmp(esi, 6);
__ j(not_equal, &fail);
__ mov(eax, 42);
__ pop(esi);
__ pop(edi);
__ pop(edx);
__ pop(ecx);
__ pop(ebx);
__ ret(0);
__ bind(&fail);
__ mov(eax, 13);
__ pop(esi);
__ pop(edi);
__ pop(edx);
__ pop(ecx);
__ pop(ebx);
__ ret(0);
CodeDesc desc;
assm.GetCode(&desc);
Code* code = Code::cast(HEAP->CreateCode(
desc,
Code::ComputeFlags(Code::STUB),
Handle<Object>(HEAP->undefined_value()))->ToObjectChecked());
CHECK(code->IsCode());
F0 f = FUNCTION_CAST<F0>(code->entry());
int res = f();
CHECK_EQ(42, res);
}
#undef __
......@@ -449,6 +449,11 @@ TEST(DisasmIa320) {
}
}
// Nop instructions
for (int i = 0; i < 16; i++) {
__ Nop(i);
}
__ ret(0);
CodeDesc desc;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment