Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8344232: [PPC64] secondary_super_cache does not scale well: C1 and interpreter #22881

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/hotspot/cpu/ppc/assembler_ppc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ class Assembler : public AbstractAssembler {
CLRRWI_OPCODE = RLWINM_OPCODE,
CLRLWI_OPCODE = RLWINM_OPCODE,

RLWNM_OPCODE = (23u << OPCODE_SHIFT),

RLWIMI_OPCODE = (20u << OPCODE_SHIFT),

SLW_OPCODE = (31u << OPCODE_SHIFT | 24u << 1),
Expand Down Expand Up @@ -424,6 +426,9 @@ class Assembler : public AbstractAssembler {
RLDIC_OPCODE = (30u << OPCODE_SHIFT | 2u << XO_27_29_SHIFT), // MD-FORM
RLDIMI_OPCODE = (30u << OPCODE_SHIFT | 3u << XO_27_29_SHIFT), // MD-FORM

RLDCL_OPCODE = (30u << OPCODE_SHIFT | 8u << 1),
RLDCR_OPCODE = (30u << OPCODE_SHIFT | 9u << 1),

SRADI_OPCODE = (31u << OPCODE_SHIFT | 413u << XO_21_29_SHIFT), // XS-FORM

SLD_OPCODE = (31u << OPCODE_SHIFT | 27u << 1), // X-FORM
Expand Down Expand Up @@ -1696,6 +1701,14 @@ class Assembler : public AbstractAssembler {
inline void insrdi( Register a, Register s, int n, int b);
inline void insrwi( Register a, Register s, int n, int b);

// Rotate variable
inline void rlwnm( Register a, Register s, Register b, int mb, int me);
inline void rlwnm_(Register a, Register s, Register b, int mb, int me);
inline void rldcl( Register a, Register s, Register b, int mb);
inline void rldcl_(Register a, Register s, Register b, int mb);
inline void rldcr( Register a, Register s, Register b, int me);
inline void rldcr_(Register a, Register s, Register b, int me);

// PPC 1, section 3.3.2 Fixed-Point Load Instructions
// 4 bytes
inline void lwzx( Register d, Register s1, Register s2);
Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,13 @@ inline void Assembler::rldimi_( Register a, Register s, int sh6, int mb6)
inline void Assembler::insrdi( Register a, Register s, int n, int b) { Assembler::rldimi(a, s, 64-(b+n), b); }
inline void Assembler::insrwi( Register a, Register s, int n, int b) { Assembler::rlwimi(a, s, 32-(b+n), b, b+n-1); }

inline void Assembler::rlwnm( Register a, Register s, Register b, int mb, int me) { emit_int32(RLWNM_OPCODE | rta(a) | rs(s) | rb(b) | mb2125(mb) | me2630(me) | rc(0)); }
inline void Assembler::rlwnm_(Register a, Register s, Register b, int mb, int me) { emit_int32(RLWNM_OPCODE | rta(a) | rs(s) | rb(b) | mb2125(mb) | me2630(me) | rc(1)); }
inline void Assembler::rldcl( Register a, Register s, Register b, int mb) { emit_int32(RLDCL_OPCODE | rta(a) | rs(s) | rb(b) | mb2126(mb) | rc(0)); }
inline void Assembler::rldcl_( Register a, Register s, Register b, int mb) { emit_int32(RLDCL_OPCODE | rta(a) | rs(s) | rb(b) | mb2126(mb) | rc(1)); }
inline void Assembler::rldcr( Register a, Register s, Register b, int me) { emit_int32(RLDCR_OPCODE | rta(a) | rs(s) | rb(b) | me2126(me) | rc(0)); }
inline void Assembler::rldcr_( Register a, Register s, Register b, int me) { emit_int32(RLDCR_OPCODE | rta(a) | rs(s) | rb(b) | me2126(me) | rc(1)); }

// PPC 1, section 3.3.2 Fixed-Point Load Instructions
inline void Assembler::lwzx( Register d, Register s1, Register s2) { emit_int32(LWZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
inline void Assembler::lwz( Register d, Address &a) {
Expand Down
7 changes: 3 additions & 4 deletions src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,10 +603,9 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
{ // Support for uint StubRoutine::partial_subtype_check( Klass sub, Klass super );
const Register sub_klass = R5,
super_klass = R4,
temp1_reg = R6,
temp2_reg = R0;
__ check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg); // returns with CR0.eq if successful
__ crandc(CCR0, Assembler::equal, CCR0, Assembler::equal); // failed: CR0.ne
temp1_reg = R6;
__ check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, noreg); // may return with CR0.eq if successful
// Otherwise, result is in CR0.
__ blr();
}
break;
Expand Down
229 changes: 201 additions & 28 deletions src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2107,12 +2107,12 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
#undef FINAL_JUMP
}

void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp1_reg,
Register temp2_reg,
Label* L_success,
Register result_reg) {
void MacroAssembler::check_klass_subtype_slow_path_linear(Register sub_klass,
Register super_klass,
Register temp1_reg,
Register temp2_reg,
Label* L_success,
Register result_reg) {
const Register array_ptr = temp1_reg; // current value from cache array
const Register temp = temp2_reg;

Expand Down Expand Up @@ -2154,6 +2154,96 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
else if (result_reg == noreg) { blr(); } // return with CR0.eq if neither label nor result reg provided

bind(fallthru);
if (L_success != nullptr && result_reg == noreg) {
crandc(CCR0, Assembler::equal, CCR0, Assembler::equal); // failed: CR0.ne
}
}

Register MacroAssembler::allocate_if_noreg(Register r,
RegSetIterator<Register> &available_regs,
RegSet &regs_to_push) {
if (!r->is_valid()) {
r = *available_regs++;
regs_to_push += r;
}
return r;
}

void MacroAssembler::push_set(RegSet set)
{
int spill_offset = 0;
for (RegSetIterator<Register> it = set.begin(); *it != noreg; ++it) {
spill_offset += wordSize;
std(*it, -spill_offset, R1_SP);
}
}

void MacroAssembler::pop_set(RegSet set)
{
int spill_offset = 0;
for (RegSetIterator<Register> it = set.begin(); *it != noreg; ++it) {
spill_offset += wordSize;
ld(*it, -spill_offset, R1_SP);
}
}

void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass,
Register super_klass,
Register temp1_reg,
Register temp2_reg,
Label* L_success,
Register result_reg) {
RegSet temps = RegSet::of(temp1_reg, temp2_reg);

assert_different_registers(sub_klass, super_klass, temp1_reg, temp2_reg, result_reg, R0);

Register temp3_reg = noreg, temp4_reg = noreg;
bool result_reg_provided = (result_reg != noreg); // otherwise, result will be in CR0

BLOCK_COMMENT("check_klass_subtype_slow_path_table");

RegSetIterator<Register> available_regs
= (RegSet::range(R2, R12) - temps - sub_klass - super_klass).begin();

RegSet pushed_regs;

temp1_reg = allocate_if_noreg(temp1_reg, available_regs, pushed_regs);
temp2_reg = allocate_if_noreg(temp2_reg, available_regs, pushed_regs);
temp3_reg = allocate_if_noreg(temp3_reg, available_regs, pushed_regs);
temp4_reg = allocate_if_noreg(temp4_reg, available_regs, pushed_regs);
result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs);

push_set(pushed_regs);

lookup_secondary_supers_table_var(sub_klass, super_klass,
temp1_reg, temp2_reg, temp3_reg, temp4_reg,
result_reg);

if (L_success != nullptr || !result_reg_provided) {
// result_reg may get overwritten by pop_set
cmpdi(CCR0, result_reg, 0);
}

// Unspill the temp. registers:
pop_set(pushed_regs);

if (L_success != nullptr) {
beq(CCR0, *L_success);
}
}

void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp1_reg,
Register temp2_reg,
Label* L_success,
Register result_reg) {
if (UseSecondarySupersTable) {
check_klass_subtype_slow_path_table(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, result_reg);
} else {
if (temp2_reg == noreg) temp2_reg = R0;
check_klass_subtype_slow_path_linear(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, result_reg);
}
}

// Try fast path, then go to slow one if not successful
Expand Down Expand Up @@ -2208,27 +2298,27 @@ do { \
(result == R8_ARG6 || result == noreg), "registers must match ppc64.ad"); \
} while(0)

void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
Register temp4,
Register result,
u1 super_klass_slot) {
void MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
Register temp4,
Register result,
u1 super_klass_slot) {
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result);

Label L_done;

BLOCK_COMMENT("lookup_secondary_supers_table {");
BLOCK_COMMENT("lookup_secondary_supers_table_const {");

const Register
r_array_base = temp1,
r_array_length = temp2,
r_array_index = temp3,
r_bitmap = temp4;

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; // Required for stub call below.

ld(r_bitmap, in_bytes(Klass::secondary_supers_bitmap_offset()), r_sub_klass);

Expand Down Expand Up @@ -2290,7 +2380,90 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
bctrl();

bind(L_done);
BLOCK_COMMENT("} lookup_secondary_supers_table");
BLOCK_COMMENT("} lookup_secondary_supers_table_const");

if (VerifySecondarySupers) {
verify_secondary_supers_table(r_sub_klass, r_super_klass, result,
temp1, temp2, temp3);
}
}

// At runtime, return 0 in result if r_super_klass is a superclass of
// r_sub_klass, otherwise return nonzero. Use this version of
// lookup_secondary_supers_table() if you don't know ahead of time
// which superclass will be searched for. Used by interpreter and
// runtime stubs. It is larger and has somewhat greater latency than
// the version above, which takes a constant super_klass_slot.
void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
Register temp4,
Register result) {
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result, R0);

Label L_done;

BLOCK_COMMENT("lookup_secondary_supers_table_var {");

const Register
r_array_base = temp1,
slot = temp2,
r_array_index = temp3,
r_bitmap = temp4;

lbz(slot, in_bytes(Klass::hash_slot_offset()), r_super_klass);
ld(r_bitmap, in_bytes(Klass::secondary_supers_bitmap_offset()), r_sub_klass);

li(result, 1); // Make sure that result is nonzero if the test below misses.

// First check the bitmap to see if super_klass might be present. If
// the bit is zero, we are certain that super_klass is not one of
// the secondary supers.
xori(R0, slot, Klass::SECONDARY_SUPERS_TABLE_SIZE - 1); // slot ^ 63 === 63 - slot (mod 64)
sld_(r_array_index, r_bitmap, R0); // shift left by 63-slot

// We test the MSB of r_array_index, i.e. its sign bit
bge(CCR0, L_done);

// We will consult the secondary-super array.
ld(r_array_base, in_bytes(Klass::secondary_supers_offset()), r_sub_klass);

// The value i in r_array_index is >= 1, so even though r_array_base
// points to the length, we don't need to adjust it to point to the data.
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");

// Get the first array index that can contain super_klass into r_array_index.
popcntd(r_array_index, r_array_index);

// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
sldi(r_array_index, r_array_index, LogBytesPerWord); // scale

ldx(R0, r_array_base, r_array_index);
xor_(result, R0, r_super_klass);
beq(CCR0, L_done); // found a match, result is 0 in this case

// Linear probe. Rotate the bitmap so that the next bit to test is
// in Bit 1.
neg(R0, slot); // rotate right
rldcl(r_bitmap, r_bitmap, R0, 0);
Register temp = slot;
andi_(temp, r_bitmap, 2);
beq(CCR0, L_done); // fail (result != 0)

// The slot we just inspected is at secondary_supers[r_array_index - 1].
// The next slot to be inspected, by the logic we're about to call,
// is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
// have been checked.
lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index,
r_bitmap, result, temp);
// return whatever we got from slow path

bind(L_done);

BLOCK_COMMENT("} lookup_secondary_supers_table_var");

if (VerifySecondarySupers) {
verify_secondary_supers_table(r_sub_klass, r_super_klass, result,
Expand All @@ -2313,8 +2486,6 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl
r_array_length = temp1,
r_sub_klass = noreg;

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

Label L_done;

// Load the array length.
Expand Down Expand Up @@ -2405,8 +2576,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
r_array_index = temp3,
r_bitmap = noreg; // unused

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

BLOCK_COMMENT("verify_secondary_supers_table {");

Label passed, failure;
Expand All @@ -2433,13 +2602,17 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
cmpd(CCR0, result, linear_result);
beq(CCR0, passed);

assert_different_registers(R3_ARG1, r_sub_klass, linear_result, result);
mr_if_needed(R3_ARG1, r_super_klass);
assert_different_registers(R4_ARG2, linear_result, result);
mr_if_needed(R4_ARG2, r_sub_klass);
assert_different_registers(R5_ARG3, result);
neg(R5_ARG3, linear_result);
neg(R6_ARG4, result);
// report fatal error and terminate VM

// Argument shuffle. Using stack to avoid clashes.
std(r_super_klass, -8, R1_SP);
std(r_sub_klass, -16, R1_SP);
std(linear_result, -24, R1_SP);
mr_if_needed(R6_ARG4, result);
ld(R3_ARG1, -8, R1_SP);
ld(R4_ARG2, -16, R1_SP);
ld(R5_ARG3, -24, R1_SP);

const char* msg = "mismatch";
load_const_optimized(R7_ARG5, (intptr_t)msg, R0);
call_VM_leaf(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure));
Expand Down
Loading