Commit 0ddb3645 authored by joransiu's avatar joransiu Committed by Commit bot

S390: Optimize allocate sequence

Improve the S390 allocate sequence by:
- Keeping allocation limit in memory, and leverage compare RX-instr.
- Prefetching subsequent cache lines from allocation top.
- Optimizing object tagging with LA
- Optimizing increment for Fast-Allocate with ASI/AGSI

R=jyan@ca.ibm.com, michael_dawson@ca.ibm.com, bjaideep@ca.ibm.com
BUG=

Review-Url: https://codereview.chromium.org/2601563002
Cr-Commit-Position: refs/heads/master@{#41933}
parent 61a55548
...@@ -1337,6 +1337,7 @@ SI_FORM_EMIT(ni, NI) ...@@ -1337,6 +1337,7 @@ SI_FORM_EMIT(ni, NI)
RI1_FORM_EMIT(nilh, NILH) RI1_FORM_EMIT(nilh, NILH)
RI1_FORM_EMIT(nill, NILL) RI1_FORM_EMIT(nill, NILL)
RI1_FORM_EMIT(oill, OILL) RI1_FORM_EMIT(oill, OILL)
RXY_FORM_EMIT(pfd, PFD)
RXY_FORM_EMIT(slgf, SLGF) RXY_FORM_EMIT(slgf, SLGF)
RXY_FORM_EMIT(strvh, STRVH) RXY_FORM_EMIT(strvh, STRVH)
RXY_FORM_EMIT(strv, STRV) RXY_FORM_EMIT(strv, STRV)
......
...@@ -987,6 +987,7 @@ class Assembler : public AssemblerBase { ...@@ -987,6 +987,7 @@ class Assembler : public AssemblerBase {
RI1_FORM(nilh); RI1_FORM(nilh);
RI1_FORM(nill); RI1_FORM(nill);
RI1_FORM(oill); RI1_FORM(oill);
RXY_FORM(pfd);
RXE_FORM(sdb); RXE_FORM(sdb);
RXY_FORM(slgf); RXY_FORM(slgf);
RS1_FORM(srdl); RS1_FORM(srdl);
......
...@@ -1415,6 +1415,9 @@ bool Decoder::DecodeSixByte(Instruction* instr) { ...@@ -1415,6 +1415,9 @@ bool Decoder::DecodeSixByte(Instruction* instr) {
case SQDB: case SQDB:
Format(instr, "sqdb\t'r1,'d1('r2d, 'r3)"); Format(instr, "sqdb\t'r1,'d1('r2d, 'r3)");
break; break;
case PFD:
Format(instr, "pfd\t'm1,'d2('r2d,'r3)");
break;
default: default:
return false; return false;
} }
......
...@@ -1579,25 +1579,18 @@ void MacroAssembler::Allocate(int object_size, Register result, ...@@ -1579,25 +1579,18 @@ void MacroAssembler::Allocate(int object_size, Register result,
// Set up allocation top address register. // Set up allocation top address register.
Register top_address = scratch1; Register top_address = scratch1;
// This code stores a temporary value in ip. This is OK, as the code below
// does not need ip for implicit literal generation.
Register alloc_limit = ip;
Register result_end = scratch2; Register result_end = scratch2;
mov(top_address, Operand(allocation_top)); mov(top_address, Operand(allocation_top));
if ((flags & RESULT_CONTAINS_TOP) == 0) { if ((flags & RESULT_CONTAINS_TOP) == 0) {
// Load allocation top into result and allocation limit into ip. // Load allocation top into result and allocation limit into ip.
LoadP(result, MemOperand(top_address)); LoadP(result, MemOperand(top_address));
LoadP(alloc_limit, MemOperand(top_address, kPointerSize));
} else { } else {
if (emit_debug_code()) { if (emit_debug_code()) {
// Assert that result actually contains top on entry. // Assert that result actually contains top on entry.
LoadP(alloc_limit, MemOperand(top_address)); CmpP(result, MemOperand(top_address));
CmpP(result, alloc_limit);
Check(eq, kUnexpectedAllocationTop); Check(eq, kUnexpectedAllocationTop);
} }
// Load allocation limit. Result already contains allocation top.
LoadP(alloc_limit, MemOperand(top_address, limit - top));
} }
if ((flags & DOUBLE_ALIGNMENT) != 0) { if ((flags & DOUBLE_ALIGNMENT) != 0) {
...@@ -1611,7 +1604,7 @@ void MacroAssembler::Allocate(int object_size, Register result, ...@@ -1611,7 +1604,7 @@ void MacroAssembler::Allocate(int object_size, Register result,
Label aligned; Label aligned;
beq(&aligned, Label::kNear); beq(&aligned, Label::kNear);
if ((flags & PRETENURE) != 0) { if ((flags & PRETENURE) != 0) {
CmpLogicalP(result, alloc_limit); CmpLogicalP(result, MemOperand(top_address, limit - top));
bge(gc_required); bge(gc_required);
} }
mov(result_end, Operand(isolate()->factory()->one_pointer_filler_map())); mov(result_end, Operand(isolate()->factory()->one_pointer_filler_map()));
...@@ -1621,27 +1614,24 @@ void MacroAssembler::Allocate(int object_size, Register result, ...@@ -1621,27 +1614,24 @@ void MacroAssembler::Allocate(int object_size, Register result,
#endif #endif
} }
// Calculate new top and bail out if new space is exhausted. Use result AddP(result_end, result, Operand(object_size));
// to calculate the new top.
SubP(r0, alloc_limit, result); // Compare with allocation limit.
if (is_int16(object_size)) { CmpLogicalP(result_end, MemOperand(top_address, limit - top));
CmpP(r0, Operand(object_size)); bge(gc_required);
blt(gc_required);
AddP(result_end, result, Operand(object_size));
} else {
mov(result_end, Operand(object_size));
CmpP(r0, result_end);
blt(gc_required);
AddP(result_end, result, result_end);
}
if ((flags & ALLOCATION_FOLDING_DOMINATOR) == 0) { if ((flags & ALLOCATION_FOLDING_DOMINATOR) == 0) {
// The top pointer is not updated for allocation folding dominators. // The top pointer is not updated for allocation folding dominators.
StoreP(result_end, MemOperand(top_address)); StoreP(result_end, MemOperand(top_address));
} }
// Prefetch the allocation_top's next cache line in advance to
// help alleviate potential cache misses.
// Mode 2 - Prefetch the data into a cache line for store access.
pfd(r2, MemOperand(result, 256));
// Tag object. // Tag object.
AddP(result, result, Operand(kHeapObjectTag)); la(result, MemOperand(result, kHeapObjectTag));
} }
void MacroAssembler::Allocate(Register object_size, Register result, void MacroAssembler::Allocate(Register object_size, Register result,
...@@ -1676,24 +1666,17 @@ void MacroAssembler::Allocate(Register object_size, Register result, ...@@ -1676,24 +1666,17 @@ void MacroAssembler::Allocate(Register object_size, Register result,
// Set up allocation top address and allocation limit registers. // Set up allocation top address and allocation limit registers.
Register top_address = scratch; Register top_address = scratch;
// This code stores a temporary value in ip. This is OK, as the code below
// does not need ip for implicit literal generation.
Register alloc_limit = ip;
mov(top_address, Operand(allocation_top)); mov(top_address, Operand(allocation_top));
if ((flags & RESULT_CONTAINS_TOP) == 0) { if ((flags & RESULT_CONTAINS_TOP) == 0) {
// Load allocation top into result and allocation limit into alloc_limit.. // Load allocation top into result
LoadP(result, MemOperand(top_address)); LoadP(result, MemOperand(top_address));
LoadP(alloc_limit, MemOperand(top_address, kPointerSize));
} else { } else {
if (emit_debug_code()) { if (emit_debug_code()) {
// Assert that result actually contains top on entry. // Assert that result actually contains top on entry.
LoadP(alloc_limit, MemOperand(top_address)); CmpP(result, MemOperand(top_address));
CmpP(result, alloc_limit);
Check(eq, kUnexpectedAllocationTop); Check(eq, kUnexpectedAllocationTop);
} }
// Load allocation limit. Result already contains allocation top.
LoadP(alloc_limit, MemOperand(top_address, limit - top));
} }
if ((flags & DOUBLE_ALIGNMENT) != 0) { if ((flags & DOUBLE_ALIGNMENT) != 0) {
...@@ -1707,7 +1690,7 @@ void MacroAssembler::Allocate(Register object_size, Register result, ...@@ -1707,7 +1690,7 @@ void MacroAssembler::Allocate(Register object_size, Register result,
Label aligned; Label aligned;
beq(&aligned, Label::kNear); beq(&aligned, Label::kNear);
if ((flags & PRETENURE) != 0) { if ((flags & PRETENURE) != 0) {
CmpLogicalP(result, alloc_limit); CmpLogicalP(result, MemOperand(top_address, limit - top));
bge(gc_required); bge(gc_required);
} }
mov(result_end, Operand(isolate()->factory()->one_pointer_filler_map())); mov(result_end, Operand(isolate()->factory()->one_pointer_filler_map()));
...@@ -1720,17 +1703,14 @@ void MacroAssembler::Allocate(Register object_size, Register result, ...@@ -1720,17 +1703,14 @@ void MacroAssembler::Allocate(Register object_size, Register result,
// Calculate new top and bail out if new space is exhausted. Use result // Calculate new top and bail out if new space is exhausted. Use result
// to calculate the new top. Object size may be in words so a shift is // to calculate the new top. Object size may be in words so a shift is
// required to get the number of bytes. // required to get the number of bytes.
SubP(r0, alloc_limit, result);
if ((flags & SIZE_IN_WORDS) != 0) { if ((flags & SIZE_IN_WORDS) != 0) {
ShiftLeftP(result_end, object_size, Operand(kPointerSizeLog2)); ShiftLeftP(result_end, object_size, Operand(kPointerSizeLog2));
CmpP(r0, result_end);
blt(gc_required);
AddP(result_end, result, result_end); AddP(result_end, result, result_end);
} else { } else {
CmpP(r0, object_size);
blt(gc_required);
AddP(result_end, result, object_size); AddP(result_end, result, object_size);
} }
CmpLogicalP(result_end, MemOperand(top_address, limit - top));
bge(gc_required);
// Update allocation top. result temporarily holds the new top. // Update allocation top. result temporarily holds the new top.
if (emit_debug_code()) { if (emit_debug_code()) {
...@@ -1742,8 +1722,13 @@ void MacroAssembler::Allocate(Register object_size, Register result, ...@@ -1742,8 +1722,13 @@ void MacroAssembler::Allocate(Register object_size, Register result,
StoreP(result_end, MemOperand(top_address)); StoreP(result_end, MemOperand(top_address));
} }
// Prefetch the allocation_top's next cache line in advance to
// help alleviate potential cache misses.
// Mode 2 - Prefetch the data into a cache line for store access.
pfd(r2, MemOperand(result, 256));
// Tag object. // Tag object.
AddP(result, result, Operand(kHeapObjectTag)); la(result, MemOperand(result, kHeapObjectTag));
} }
void MacroAssembler::FastAllocate(Register object_size, Register result, void MacroAssembler::FastAllocate(Register object_size, Register result,
...@@ -1795,8 +1780,13 @@ void MacroAssembler::FastAllocate(Register object_size, Register result, ...@@ -1795,8 +1780,13 @@ void MacroAssembler::FastAllocate(Register object_size, Register result,
} }
StoreP(result_end, MemOperand(top_address)); StoreP(result_end, MemOperand(top_address));
// Prefetch the allocation_top's next cache line in advance to
// help alleviate potential cache misses.
// Mode 2 - Prefetch the data into a cache line for store access.
pfd(r2, MemOperand(result, 256));
// Tag object. // Tag object.
AddP(result, result, Operand(kHeapObjectTag)); la(result, MemOperand(result, kHeapObjectTag));
} }
void MacroAssembler::FastAllocate(int object_size, Register result, void MacroAssembler::FastAllocate(int object_size, Register result,
...@@ -1837,14 +1827,24 @@ void MacroAssembler::FastAllocate(int object_size, Register result, ...@@ -1837,14 +1827,24 @@ void MacroAssembler::FastAllocate(int object_size, Register result,
#endif #endif
} }
// Calculate new top using result. if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT) && is_int8(object_size)) {
AddP(result_end, result, Operand(object_size)); // Update allocation top.
AddP(MemOperand(top_address), Operand(object_size));
} else {
// Calculate new top using result.
AddP(result_end, result, Operand(object_size));
// The top pointer is not updated for allocation folding dominators. // Update allocation top.
StoreP(result_end, MemOperand(top_address)); StoreP(result_end, MemOperand(top_address));
}
// Prefetch the allocation_top's next cache line in advance to
// help alleviate potential cache misses.
// Mode 2 - Prefetch the data into a cache line for store access.
pfd(r2, MemOperand(result, 256));
// Tag object. // Tag object.
AddP(result, result, Operand(kHeapObjectTag)); la(result, MemOperand(result, kHeapObjectTag));
} }
void MacroAssembler::CompareObjectType(Register object, Register map, void MacroAssembler::CompareObjectType(Register object, Register map,
......
...@@ -11152,9 +11152,9 @@ EVALUATE(CGH) { ...@@ -11152,9 +11152,9 @@ EVALUATE(CGH) {
} }
EVALUATE(PFD) { EVALUATE(PFD) {
UNIMPLEMENTED(); DCHECK_OPCODE(PFD);
USE(instr); USE(instr);
return 0; return 6;
} }
EVALUATE(STRV) { EVALUATE(STRV) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment