Commit 95ccc97f authored by Ivica Bogosavljevic's avatar Ivica Bogosavljevic Committed by Commit Bot

MIPS: Rewrite byteswap implementation

Byteswap operations are used a lot in WASM on big endian
architecture. This CL removed unused 1-byte swapping,
rewrite tests so they have better coverage and cleans up
some other minor things.

TEST=cctest/test-run-wasm/RunWasmTurbofan_I32DivSOnDifferentRegisters

Change-Id: I60466bbd5fe3f64e8e55684265dc43e92fcabc2c
Reviewed-on: https://chromium-review.googlesource.com/1106379
Commit-Queue: Ivica Bogosavljevic <ivica.bogosavljevic@mips.com>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarSreten Kovacevic <sreten.kovacevic@mips.com>
Cr-Commit-Position: refs/heads/master@{#53908}
parent d7e6fbe5
......@@ -1827,8 +1827,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kMips64ByteSwap32: {
__ ByteSwapUnsigned(i.OutputRegister(0), i.InputRegister(0), 4);
__ dsrl32(i.OutputRegister(0), i.OutputRegister(0), 0);
__ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 4);
break;
}
case kWord32AtomicLoadInt8:
......
......@@ -1125,15 +1125,7 @@ int TruncUlDLatency() {
int PushLatency() { return DadduLatency() + AlignedMemoryLatency(); }
int ByteSwapSignedLatency() {
// Estimated max.
return 4;
}
int ByteSwapUnsignedLatency() {
// Estimated max.
return 4;
}
int ByteSwapSignedLatency() { return 2; }
int LlLatency(int offset) {
bool is_one_instruction =
......@@ -1648,7 +1640,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kMips64ByteSwap64:
return ByteSwapSignedLatency();
case kMips64ByteSwap32:
return ByteSwapUnsignedLatency() + 1;
return ByteSwapSignedLatency();
case kWord32AtomicLoadInt8:
case kWord32AtomicLoadUint8:
case kWord32AtomicLoadInt16:
......
......@@ -973,71 +973,66 @@ void TurboAssembler::Bnvc(Register rs, Register rt, Label* L) {
// Word Swap Byte
void TurboAssembler::ByteSwapSigned(Register dest, Register src,
int operand_size) {
DCHECK(operand_size == 1 || operand_size == 2 || operand_size == 4);
Register input = src;
if (operand_size == 2) {
input = dest;
Seh(dest, src);
} else if (operand_size == 1) {
input = dest;
Seb(dest, src);
}
// No need to do any preparation if operand_size is 4
DCHECK(operand_size == 2 || operand_size == 4);
if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
wsbh(dest, input);
rotr(dest, dest, 16);
if (operand_size == 2) {
wsbh(dest, src);
seh(dest, dest);
} else {
wsbh(dest, src);
rotr(dest, dest, 16);
}
} else if (IsMipsArchVariant(kMips32r1) || IsMipsArchVariant(kLoongson)) {
Register tmp = at;
Register tmp2 = t8;
DCHECK(dest != tmp && dest != tmp2);
DCHECK(src != tmp && src != tmp2);
if (operand_size == 2) {
DCHECK(src != at && dest != at);
srl(at, src, 8);
andi(at, at, 0xFF);
sll(dest, src, 8);
or_(dest, dest, at);
// Sign-extension
sll(dest, dest, 16);
sra(dest, dest, 16);
} else {
Register tmp = at;
Register tmp2 = t8;
DCHECK(dest != tmp && dest != tmp2);
DCHECK(src != tmp && src != tmp2);
andi(tmp2, input, 0xFF);
sll(tmp, tmp2, 24);
andi(tmp2, src, 0xFF);
sll(tmp, tmp2, 24);
andi(tmp2, input, 0xFF00);
sll(tmp2, tmp2, 8);
or_(tmp, tmp, tmp2);
andi(tmp2, src, 0xFF00);
sll(tmp2, tmp2, 8);
or_(tmp, tmp, tmp2);
srl(tmp2, input, 8);
andi(tmp2, tmp2, 0xFF00);
or_(tmp, tmp, tmp2);
srl(tmp2, src, 8);
andi(tmp2, tmp2, 0xFF00);
or_(tmp, tmp, tmp2);
srl(tmp2, input, 24);
or_(dest, tmp, tmp2);
srl(tmp2, src, 24);
or_(dest, tmp, tmp2);
}
}
}
void TurboAssembler::ByteSwapUnsigned(Register dest, Register src,
int operand_size) {
DCHECK(operand_size == 1 || operand_size == 2);
DCHECK_EQ(operand_size, 2);
if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
Register input = src;
if (operand_size == 1) {
input = dest;
andi(dest, src, 0xFF);
} else {
input = dest;
andi(dest, src, 0xFFFF);
}
// No need to do any preparation if operand_size is 4
wsbh(dest, input);
rotr(dest, dest, 16);
wsbh(dest, src);
andi(dest, dest, 0xFFFF);
} else if (IsMipsArchVariant(kMips32r1) || IsMipsArchVariant(kLoongson)) {
if (operand_size == 1) {
sll(dest, src, 24);
} else {
Register tmp = at;
andi(tmp, src, 0xFF00);
sll(dest, src, 24);
sll(tmp, tmp, 8);
or_(dest, tmp, dest);
}
DCHECK(src != at && dest != at);
srl(at, src, 8);
andi(at, at, 0xFF);
sll(dest, src, 8);
or_(dest, dest, at);
// Zero-extension
andi(dest, dest, 0xFFFF);
}
}
......
......@@ -1117,23 +1117,14 @@ void TurboAssembler::Bnvc(Register rs, Register rt, Label* L) {
// Change endianness
void TurboAssembler::ByteSwapSigned(Register dest, Register src,
int operand_size) {
DCHECK(operand_size == 1 || operand_size == 2 || operand_size == 4 ||
operand_size == 8);
DCHECK(operand_size == 2 || operand_size == 4 || operand_size == 8);
DCHECK(kArchVariant == kMips64r6 || kArchVariant == kMips64r2);
if (operand_size == 1) {
seb(src, src);
sll(src, src, 0);
dsbh(dest, src);
dshd(dest, dest);
} else if (operand_size == 2) {
seh(src, src);
sll(src, src, 0);
dsbh(dest, src);
dshd(dest, dest);
if (operand_size == 2) {
wsbh(dest, src);
seh(dest, dest);
} else if (operand_size == 4) {
sll(src, src, 0);
dsbh(dest, src);
dshd(dest, dest);
wsbh(dest, src);
rotr(dest, dest, 16);
} else {
dsbh(dest, src);
dshd(dest, dest);
......@@ -1142,20 +1133,14 @@ void TurboAssembler::ByteSwapSigned(Register dest, Register src,
void TurboAssembler::ByteSwapUnsigned(Register dest, Register src,
int operand_size) {
DCHECK(operand_size == 1 || operand_size == 2 || operand_size == 4);
if (operand_size == 1) {
andi(src, src, 0xFF);
dsbh(dest, src);
dshd(dest, dest);
} else if (operand_size == 2) {
andi(src, src, 0xFFFF);
dsbh(dest, src);
dshd(dest, dest);
} else {
dsll32(src, src, 0);
dsrl32(src, src, 0);
dsbh(dest, src);
dshd(dest, dest);
DCHECK(operand_size == 2 || operand_size == 4);
if (operand_size == 2) {
wsbh(dest, src);
andi(dest, dest, 0xFFFF);
} else {
wsbh(dest, src);
rotr(dest, dest, 16);
dinsu_(dest, zero_reg, 32, 32);
}
}
......
......@@ -1664,13 +1664,13 @@ static inline V ByteReverse(V value) {
switch (size_of_v) {
case 2:
#if V8_HAS_BUILTIN_BSWAP16
return __builtin_bswap16(value);
return static_cast<V>(__builtin_bswap16(static_cast<uint16_t>(value)));
#else
return value << 8 | (value >> 8 & 0x00FF);
#endif
case 4:
#if V8_HAS_BUILTIN_BSWAP32
return __builtin_bswap32(value);
return static_cast<V>(__builtin_bswap32(static_cast<uint32_t>(value)));
#else
{
size_t bits_of_v = size_of_v * kBitsPerByte;
......@@ -1682,7 +1682,7 @@ static inline V ByteReverse(V value) {
#endif
case 8:
#if V8_HAS_BUILTIN_BSWAP64
return __builtin_bswap64(value);
return static_cast<V>(__builtin_bswap64(static_cast<uint64_t>(value)));
#else
{
size_t bits_of_v = size_of_v * kBitsPerByte;
......
......@@ -50,65 +50,37 @@ using F4 = Object*(void* p0, void* p1, int p2, int p3, int p4);
TEST(BYTESWAP) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope handles(isolate);
HandleScope scope(isolate);
struct T {
int32_t r1;
int32_t r2;
int32_t r3;
int32_t r4;
int32_t r5;
int32_t t1;
int32_t t2;
int32_t t3;
int32_t t4;
int32_t t5;
int32_t p1;
int32_t p2;
int32_t p3;
int32_t p4;
int32_t p5;
uint32_t s4;
uint32_t s2;
uint32_t u2;
};
T t;
uint32_t test_values[] = {0x5612FFCD, 0x9D327ACC, 0x781A15C3, 0xFCDE, 0x9F,
0xC81A15C3, 0x80000000, 0xFFFFFFFF, 0x00008000};
MacroAssembler assembler(isolate, nullptr, 0,
v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
__ lw(a2, MemOperand(a0, offsetof(T, r1)));
__ ByteSwapSigned(a1, a2, 4);
__ sw(a2, MemOperand(a0, offsetof(T, p1)));
__ sw(a1, MemOperand(a0, offsetof(T, t1)));
__ ByteSwapSigned(a2, a2, 4);
__ sw(a2, MemOperand(a0, offsetof(T, r1)));
__ lw(a2, MemOperand(a0, offsetof(T, r2)));
__ ByteSwapSigned(a1, a2, 2);
__ sw(a2, MemOperand(a0, offsetof(T, p2)));
__ sw(a1, MemOperand(a0, offsetof(T, t2)));
__ ByteSwapSigned(a2, a2, 2);
__ sw(a2, MemOperand(a0, offsetof(T, r2)));
__ lw(a2, MemOperand(a0, offsetof(T, r3)));
__ ByteSwapSigned(a1, a2, 1);
__ sw(a2, MemOperand(a0, offsetof(T, p3)));
__ sw(a1, MemOperand(a0, offsetof(T, t3)));
__ ByteSwapSigned(a2, a2, 1);
__ sw(a2, MemOperand(a0, offsetof(T, r3)));
__ lw(a2, MemOperand(a0, offsetof(T, r4)));
__ ByteSwapUnsigned(a1, a2, 1);
__ sw(a2, MemOperand(a0, offsetof(T, p4)));
__ sw(a1, MemOperand(a0, offsetof(T, t4)));
__ ByteSwapUnsigned(a2, a2, 1);
__ sw(a2, MemOperand(a0, offsetof(T, r4)));
__ lw(a2, MemOperand(a0, offsetof(T, r5)));
__ ByteSwapUnsigned(a1, a2, 2);
__ sw(a2, MemOperand(a0, offsetof(T, p5)));
__ sw(a1, MemOperand(a0, offsetof(T, t5)));
__ ByteSwapUnsigned(a2, a2, 2);
__ sw(a2, MemOperand(a0, offsetof(T, r5)));
__ lw(a1, MemOperand(a0, offsetof(T, s4)));
__ nop();
__ ByteSwapSigned(a1, a1, 4);
__ sw(a1, MemOperand(a0, offsetof(T, s4)));
__ lw(a1, MemOperand(a0, offsetof(T, s2)));
__ nop();
__ ByteSwapSigned(a1, a1, 2);
__ sw(a1, MemOperand(a0, offsetof(T, s2)));
__ lw(a1, MemOperand(a0, offsetof(T, u2)));
__ nop();
__ ByteSwapUnsigned(a1, a1, 2);
__ sw(a1, MemOperand(a0, offsetof(T, u2)));
__ jr(ra);
__ nop();
......@@ -118,30 +90,21 @@ TEST(BYTESWAP) {
Handle<Code> code =
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
auto f = GeneratedCode<F3>::FromCode(*code);
t.r1 = 0x781A15C3;
t.r2 = 0x2CDE;
t.r3 = 0x9F;
t.r4 = 0x9F;
t.r5 = 0x2CDE;
f.Call(&t, 0, 0, 0, 0);
CHECK_EQ(static_cast<int32_t>(0xC3151A78), t.r1);
CHECK_EQ(static_cast<int32_t>(0xDE2C0000), t.r2);
CHECK_EQ(static_cast<int32_t>(0x9FFFFFFF), t.r3);
CHECK_EQ(static_cast<int32_t>(0x9F000000), t.r4);
CHECK_EQ(static_cast<int32_t>(0xDE2C0000), t.r5);
CHECK_EQ(t.t1, t.r1);
CHECK_EQ(t.t2, t.r2);
CHECK_EQ(t.t3, t.r3);
CHECK_EQ(t.t4, t.r4);
CHECK_EQ(t.t5, t.r5);
CHECK_EQ(static_cast<int32_t>(0x781A15C3), t.p1);
CHECK_EQ(static_cast<int32_t>(0x2CDE), t.p2);
CHECK_EQ(static_cast<int32_t>(0x9F), t.p3);
CHECK_EQ(static_cast<int32_t>(0x9F), t.p4);
CHECK_EQ(static_cast<int32_t>(0x2CDE), t.p5);
for (size_t i = 0; i < arraysize(test_values); i++) {
int16_t in_s2 = static_cast<int16_t>(test_values[i]);
uint16_t in_u2 = static_cast<uint16_t>(test_values[i]);
t.s4 = test_values[i];
t.s2 = static_cast<uint64_t>(in_s2);
t.u2 = static_cast<uint64_t>(in_u2);
f.Call(&t, 0, 0, 0, 0);
CHECK_EQ(ByteReverse(test_values[i]), t.s4);
CHECK_EQ(ByteReverse<int16_t>(in_s2), static_cast<int16_t>(t.s2));
CHECK_EQ(ByteReverse<uint16_t>(in_u2), static_cast<uint16_t>(t.u2));
}
}
static void TestNaN(const char *code) {
......
......@@ -55,55 +55,53 @@ TEST(BYTESWAP) {
HandleScope scope(isolate);
struct T {
int64_t r1;
int64_t r2;
int64_t r3;
int64_t r4;
int64_t r5;
int64_t r6;
int64_t r7;
uint64_t s8;
uint64_t s4;
uint64_t s2;
uint64_t u4;
uint64_t u2;
};
T t;
uint64_t test_values[] = {0x5612FFCD9D327ACC,
0x781A15C3,
0xFCDE,
0x9F,
0xC81A15C3,
0x8000000000000000,
0xFFFFFFFFFFFFFFFF,
0x0000000080000000,
0x0000000000008000};
MacroAssembler assembler(isolate, nullptr, 0,
v8::internal::CodeObjectRequired::kYes);
MacroAssembler* masm = &assembler;
__ Ld(a4, MemOperand(a0, offsetof(T, r1)));
__ Ld(a4, MemOperand(a0, offsetof(T, s8)));
__ nop();
__ ByteSwapSigned(a4, a4, 8);
__ Sd(a4, MemOperand(a0, offsetof(T, r1)));
__ Sd(a4, MemOperand(a0, offsetof(T, s8)));
__ Ld(a4, MemOperand(a0, offsetof(T, r2)));
__ Ld(a4, MemOperand(a0, offsetof(T, s4)));
__ nop();
__ ByteSwapSigned(a4, a4, 4);
__ Sd(a4, MemOperand(a0, offsetof(T, r2)));
__ Sd(a4, MemOperand(a0, offsetof(T, s4)));
__ Ld(a4, MemOperand(a0, offsetof(T, r3)));
__ Ld(a4, MemOperand(a0, offsetof(T, s2)));
__ nop();
__ ByteSwapSigned(a4, a4, 2);
__ Sd(a4, MemOperand(a0, offsetof(T, r3)));
__ Ld(a4, MemOperand(a0, offsetof(T, r4)));
__ nop();
__ ByteSwapSigned(a4, a4, 1);
__ Sd(a4, MemOperand(a0, offsetof(T, r4)));
__ Sd(a4, MemOperand(a0, offsetof(T, s2)));
__ Ld(a4, MemOperand(a0, offsetof(T, r5)));
__ Ld(a4, MemOperand(a0, offsetof(T, u4)));
__ nop();
__ ByteSwapUnsigned(a4, a4, 1);
__ Sd(a4, MemOperand(a0, offsetof(T, r5)));
__ ByteSwapUnsigned(a4, a4, 4);
__ Sd(a4, MemOperand(a0, offsetof(T, u4)));
__ Ld(a4, MemOperand(a0, offsetof(T, r6)));
__ Ld(a4, MemOperand(a0, offsetof(T, u2)));
__ nop();
__ ByteSwapUnsigned(a4, a4, 2);
__ Sd(a4, MemOperand(a0, offsetof(T, r6)));
__ Ld(a4, MemOperand(a0, offsetof(T, r7)));
__ nop();
__ ByteSwapUnsigned(a4, a4, 4);
__ Sd(a4, MemOperand(a0, offsetof(T, r7)));
__ Sd(a4, MemOperand(a0, offsetof(T, u2)));
__ jr(ra);
__ nop();
......@@ -113,22 +111,27 @@ TEST(BYTESWAP) {
Handle<Code> code =
isolate->factory()->NewCode(desc, Code::STUB, Handle<Code>());
auto f = GeneratedCode<F3>::FromCode(*code);
t.r1 = 0x5612FFCD9D327ACC;
t.r2 = 0x781A15C3;
t.r3 = 0xFCDE;
t.r4 = 0x9F;
t.r5 = 0x9F;
t.r6 = 0xFCDE;
t.r7 = 0xC81A15C3;
f.Call(&t, 0, 0, 0, 0);
CHECK_EQ(static_cast<int64_t>(0xCC7A329DCDFF1256), t.r1);
CHECK_EQ(static_cast<int64_t>(0xC3151A7800000000), t.r2);
CHECK_EQ(static_cast<int64_t>(0xDEFCFFFFFFFFFFFF), t.r3);
CHECK_EQ(static_cast<int64_t>(0x9FFFFFFFFFFFFFFF), t.r4);
CHECK_EQ(static_cast<int64_t>(0x9F00000000000000), t.r5);
CHECK_EQ(static_cast<int64_t>(0xDEFC000000000000), t.r6);
CHECK_EQ(static_cast<int64_t>(0xC3151AC800000000), t.r7);
for (size_t i = 0; i < arraysize(test_values); i++) {
int32_t in_s4 = static_cast<int32_t>(test_values[i]);
int16_t in_s2 = static_cast<int16_t>(test_values[i]);
uint32_t in_u4 = static_cast<uint32_t>(test_values[i]);
uint16_t in_u2 = static_cast<uint16_t>(test_values[i]);
t.s8 = test_values[i];
t.s4 = static_cast<uint64_t>(in_s4);
t.s2 = static_cast<uint64_t>(in_s2);
t.u4 = static_cast<uint64_t>(in_u4);
t.u2 = static_cast<uint64_t>(in_u2);
f.Call(&t, 0, 0, 0, 0);
CHECK_EQ(ByteReverse<uint64_t>(test_values[i]), t.s8);
CHECK_EQ(ByteReverse<int32_t>(in_s4), static_cast<int32_t>(t.s4));
CHECK_EQ(ByteReverse<int16_t>(in_s2), static_cast<int16_t>(t.s2));
CHECK_EQ(ByteReverse<uint32_t>(in_u4), static_cast<uint32_t>(t.u4));
CHECK_EQ(ByteReverse<uint16_t>(in_u2), static_cast<uint16_t>(t.u2));
}
}
TEST(LoadConstants) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment