< prev index next >

src/cpu/ppc/vm/ppc.ad

Print this page
rev 12310 : Reserve R30 to a cleared content register on C1 and C2 code

Several times a 0 is loaded to a register as a temporary value. This can be
improved by caching a 0 into a register.

I didn't notice a performance drop since only applying this patch showed no
drop of performance, hence there are more registers available than normally
needed and this caching technique can be applied.

Despite setting R30_zero as a dedicated register and initialized with 0 for the
C1 and C2 code, new rules for storing 0 related to stb,sth,stw,std were added.

*** 298,308 **** R25, R25_H, R26, R26_H, R27, R27_H, R28, R28_H, R29, R29_H, - R30, R30_H, R31, R31_H, // scratch/special registers R11, R11_H, R12, R12_H, --- 298,307 ----
*** 316,325 **** --- 315,325 ---- R5, R5_H, R4, R4_H, R3, R3_H, // special registers, not available for allocation + R30, R30_H, // R30_zero R16, R16_H, // R16_thread R13, R13_H, // system thread id R2, R2_H, // may be used for TOC R1, R1_H, // SP R0, R0_H // R0 (scratch)
*** 446,456 **** R25, R26, R27, R28, /*R29,*/ // global TOC ! R30, R31 ); // 32 bit registers that can only be read i.e. these registers can // only be src of all instructions. --- 446,456 ---- R25, R26, R27, R28, /*R29,*/ // global TOC ! /*R30,*/ // R30_zero R31 ); // 32 bit registers that can only be read i.e. these registers can // only be src of all instructions.
*** 483,493 **** R25, R26, R27, R28, /*R29,*/ ! R30, R31 ); reg_class rscratch1_bits32_reg(R11); reg_class rscratch2_bits32_reg(R12); --- 483,493 ---- R25, R26, R27, R28, /*R29,*/ ! /*R30,*/ // R30_zero R31 ); reg_class rscratch1_bits32_reg(R11); reg_class rscratch2_bits32_reg(R12);
*** 543,553 **** R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ ! R30_H, R30, R31_H, R31 ); // 64 bit registers used excluding r2, r11 and r12 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses --- 543,553 ---- R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ ! /*R30_H, R30,*/ // R30_zero R31_H, R31 ); // 64 bit registers used excluding r2, r11 and r12 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses
*** 581,591 **** R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ ! R30_H, R30, R31_H, R31 ); // Used to hold the TOC to avoid collisions with expanded DynamicCall // which uses r19 as inline cache internally and expanded LeafCall which uses --- 581,591 ---- R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ ! /*R30_H, R30,*/ // R30_zero R31_H, R31 ); // Used to hold the TOC to avoid collisions with expanded DynamicCall // which uses r19 as inline cache internally and expanded LeafCall which uses
*** 619,629 **** R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ ! R30_H, R30, R31_H, R31 ); // 64 bit registers that can only be read i.e. these registers can // only be src of all instructions. --- 619,629 ---- R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ ! /*R30_H, R30,*/ // R30_zero R31_H, R31 ); // 64 bit registers that can only be read i.e. these registers can // only be src of all instructions.
*** 656,666 **** R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ // TODO: let allocator handle TOC!! ! R30_H, R30, R31_H, R31 ); // ---------------------------- --- 656,666 ---- R25_H, R25, R26_H, R26, R27_H, R27, R28_H, R28, /*R29_H, R29,*/ // TODO: let allocator handle TOC!! ! /*R30_H, R30,*/ // R30_zero R31_H, R31 ); // ----------------------------
*** 2798,2819 **** // StoreStore barrier conditionally. __ lwz(R0, 0, $releaseFieldAddr$$Register); __ cmpwi($crx$$CondRegister, R0, 0); __ beq_predict_taken($crx$$CondRegister, skip_storestore); #endif - __ li(R0, 0); __ membar(Assembler::StoreStore); #if 0 // TODO: PPC port __ bind(skip_storestore); #endif // Do the store. if ($mem$$index == 0) { ! __ stb(R0, $mem$$disp, $mem$$base$$Register); } else { assert(0 == $mem$$disp, "no displacement possible with indexed load/stores on ppc"); ! __ stbx(R0, $mem$$base$$Register, $mem$$index$$Register); } %} enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{ --- 2798,2818 ---- // StoreStore barrier conditionally. __ lwz(R0, 0, $releaseFieldAddr$$Register); __ cmpwi($crx$$CondRegister, R0, 0); __ beq_predict_taken($crx$$CondRegister, skip_storestore); #endif __ membar(Assembler::StoreStore); #if 0 // TODO: PPC port __ bind(skip_storestore); #endif // Do the store. if ($mem$$index == 0) { ! __ stb(R30_zero, $mem$$disp, $mem$$base$$Register); } else { assert(0 == $mem$$disp, "no displacement possible with indexed load/stores on ppc"); ! __ stbx(R30_zero, $mem$$base$$Register, $mem$$index$$Register); } %} enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
*** 6278,6287 **** --- 6277,6300 ---- __ stb($src$$Register, Idisp, $mem$$base$$Register); %} ins_pipe(pipe_class_memory); %} + instruct storeB_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreB mem zero)); + ins_cost(MEMORY_REF_COST); + + format %{ "STB 0, $mem \t// store 0 on a byte" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_stb); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ stb(R30_zero, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); + %} + // Store Char/Short instruct storeC(memory mem, iRegIsrc src) %{ match(Set mem (StoreC mem src)); ins_cost(MEMORY_REF_COST);
*** 6293,6302 **** --- 6306,6329 ---- __ sth($src$$Register, Idisp, $mem$$base$$Register); %} ins_pipe(pipe_class_memory); %} + instruct storeC_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreC mem zero)); + ins_cost(MEMORY_REF_COST); + + format %{ "STH 0, $mem \t// store 0 on a short" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_sth); + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ sth(R30_zero, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); + %} + // Store Integer instruct storeI(memory mem, iRegIsrc src) %{ match(Set mem (StoreI mem src)); ins_cost(MEMORY_REF_COST);
*** 6304,6313 **** --- 6331,6353 ---- size(4); ins_encode( enc_stw(src, mem) ); ins_pipe(pipe_class_memory); %} + instruct storeI_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreI mem zero)); + ins_cost(MEMORY_REF_COST); + + format %{ "STW 0, $mem \t// store 0 on a word" %} + size(4); + ins_encode %{ + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + __ stw(R30_zero, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); + %} + // ConvL2I + StoreI. instruct storeI_convL2I(memory mem, iRegLsrc src) %{ match(Set mem (StoreI mem (ConvL2I src))); ins_cost(MEMORY_REF_COST);
*** 6326,6335 **** --- 6366,6390 ---- size(4); ins_encode( enc_std(src, mem) ); ins_pipe(pipe_class_memory); %} + instruct storeL_0(memoryAlg4 mem, immL_0 zero) %{ + match(Set mem (StoreL mem zero)); + ins_cost(MEMORY_REF_COST); + + format %{ "STD 0, $mem \t// store 0 on a long" %} + size(4); + ins_encode %{ + int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_); + // Operand 'ds' requires 4-alignment. + assert((Idisp & 0x3) == 0, "unaligned offset"); + __ std(R30_zero, Idisp, $mem$$base$$Register); + %} + ins_pipe(pipe_class_memory); + %} + // Store super word nodes. // Store Aligned Packed Byte long register to memory instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{ predicate(n->as_StoreVector()->memory_size() == 8);
*** 6446,6459 **** format %{ "STB #0, $mem \t// CMS card-mark byte store (G1)" %} size(8); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); - __ li(R0, 0); //__ release(); // G1: oops are allowed to get visible after dirty marking guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias"); ! __ stb(R0, $mem$$disp, $mem$$base$$Register); %} ins_pipe(pipe_class_memory); %} // Convert oop pointer into compressed form. --- 6501,6513 ---- format %{ "STB #0, $mem \t// CMS card-mark byte store (G1)" %} size(8); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); //__ release(); // G1: oops are allowed to get visible after dirty marking guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias"); ! __ stb(R30_zero, $mem$$disp, $mem$$base$$Register); %} ins_pipe(pipe_class_memory); %} // Convert oop pointer into compressed form.
*** 11294,11305 **** size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 16 : 12); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmovecr); Label done; __ bns($crx$$CondRegister, done); // not unordered -> keep crx ! __ li(R0, 0); ! __ cmpwi($crx$$CondRegister, R0, 1); // unordered -> set crx to 'less' // TODO PPC port __ endgroup_if_needed(_size == 16); __ bind(done); %} ins_pipe(pipe_class_default); %} --- 11348,11358 ---- size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 16 : 12); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmovecr); Label done; __ bns($crx$$CondRegister, done); // not unordered -> keep crx ! __ cmpwi($crx$$CondRegister, R30_zero, 1); // unordered -> set crx to 'less' // TODO PPC port __ endgroup_if_needed(_size == 16); __ bind(done); %} ins_pipe(pipe_class_default); %}
*** 12746,12797 **** match(Set cr0 (OverflowAddL op1 op2)); format %{ "add_ $op1, $op2\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ li(R0, 0); ! __ mtxer(R0); // clear XER.SO ! __ addo_(R0, $op1$$Register, $op2$$Register); %} ins_pipe(pipe_class_default); %} instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ match(Set cr0 (OverflowSubL op1 op2)); format %{ "subfo_ R0, $op2, $op1\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ li(R0, 0); ! __ mtxer(R0); // clear XER.SO ! __ subfo_(R0, $op2$$Register, $op1$$Register); %} ins_pipe(pipe_class_default); %} instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{ match(Set cr0 (OverflowSubL zero op2)); format %{ "nego_ R0, $op2\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ li(R0, 0); ! __ mtxer(R0); // clear XER.SO ! __ nego_(R0, $op2$$Register); %} ins_pipe(pipe_class_default); %} instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ match(Set cr0 (OverflowMulL op1 op2)); format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ li(R0, 0); ! __ mtxer(R0); // clear XER.SO ! __ mulldo_(R0, $op1$$Register, $op2$$Register); %} ins_pipe(pipe_class_default); %} --- 12799,12846 ---- match(Set cr0 (OverflowAddL op1 op2)); format %{ "add_ $op1, $op2\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ mtxer(R30_zero); // clear XER.SO ! __ addo_(R30_zero, $op1$$Register, $op2$$Register); %} ins_pipe(pipe_class_default); %} instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ match(Set cr0 (OverflowSubL op1 op2)); format %{ "subfo_ R0, $op2, $op1\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ mtxer(R30_zero); // clear XER.SO ! __ subfo_(R30_zero, $op2$$Register, $op1$$Register); %} ins_pipe(pipe_class_default); %} instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{ match(Set cr0 (OverflowSubL zero op2)); format %{ "nego_ R0, $op2\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ mtxer(R30_zero); // clear XER.SO ! __ nego_(R30_zero, $op2$$Register); %} ins_pipe(pipe_class_default); %} instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ match(Set cr0 (OverflowMulL op1 op2)); format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); ! __ mtxer(R30_zero); // clear XER.SO ! __ mulldo_(R30_zero, $op1$$Register, $op2$$Register); %} ins_pipe(pipe_class_default); %}
< prev index next >