< prev index next >

src/cpu/ppc/vm/ppc.ad

Print this page
rev 12310 : Reserve R30 to a cleared content register on C1 and C2 code

Several times a 0 is loaded to a register as a temporary value. This can be
improved by caching a 0 into a register.

I didn't notice a performance drop since only applying this patch showed no
drop of performance, hence there are more registers available than normally
needed and this caching technique can be applied.

Despite setting R30_zero as a dedicated register and initialized with 0 for the
C1 and C2 code, new rules for storing 0 related to stb,sth,stw,std were added.

@@ -298,11 +298,10 @@
   R25, R25_H,
   R26, R26_H,
   R27, R27_H,
   R28, R28_H,
   R29, R29_H,
-  R30, R30_H,
   R31, R31_H,
 
   // scratch/special registers
   R11, R11_H,
   R12, R12_H,

@@ -316,10 +315,11 @@
   R5,  R5_H,
   R4,  R4_H,
   R3,  R3_H,
 
   // special registers, not available for allocation
+  R30, R30_H,     // R30_zero
   R16, R16_H,     // R16_thread
   R13, R13_H,     // system thread id
   R2,  R2_H,      // may be used for TOC
   R1,  R1_H,      // SP
   R0,  R0_H       // R0 (scratch)

@@ -446,11 +446,11 @@
   R25,
   R26,
   R27,
   R28,
 /*R29,*/             // global TOC
-  R30,
+/*R30,*/             // R30_zero
   R31
 );
 
 // 32 bit registers that can only be read i.e. these registers can
 // only be src of all instructions.

@@ -483,11 +483,11 @@
   R25,
   R26,
   R27,
   R28,
 /*R29,*/
-  R30,
+/*R30,*/            // R30_zero
   R31
 );
 
 reg_class rscratch1_bits32_reg(R11);
 reg_class rscratch2_bits32_reg(R12);

@@ -543,11 +543,11 @@
   R25_H, R25,
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
 /*R29_H, R29,*/
-  R30_H, R30,
+/*R30_H, R30,*/  // R30_zero
   R31_H, R31
 );
 
 // 64 bit registers used excluding r2, r11 and r12
 // Used to hold the TOC to avoid collisions with expanded LeafCall which uses

@@ -581,11 +581,11 @@
   R25_H, R25,
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
 /*R29_H, R29,*/
-  R30_H, R30,
+/*R30_H, R30,*/  // R30_zero
   R31_H, R31
 );
 
 // Used to hold the TOC to avoid collisions with expanded DynamicCall
 // which uses r19 as inline cache internally and expanded LeafCall which uses

@@ -619,11 +619,11 @@
   R25_H, R25,
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
 /*R29_H, R29,*/
-  R30_H, R30,
+/*R30_H, R30,*/  // R30_zero
   R31_H, R31
 );
 
 // 64 bit registers that can only be read i.e. these registers can
 // only be src of all instructions.

@@ -656,11 +656,11 @@
   R25_H, R25,
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
 /*R29_H, R29,*/ // TODO: let allocator handle TOC!!
-  R30_H, R30,
+/*R30_H, R30,*/  // R30_zero
   R31_H, R31
 );
 
 
 // ----------------------------

@@ -2798,22 +2798,21 @@
     // StoreStore barrier conditionally.
     __ lwz(R0, 0, $releaseFieldAddr$$Register);
     __ cmpwi($crx$$CondRegister, R0, 0);
     __ beq_predict_taken($crx$$CondRegister, skip_storestore);
 #endif
-    __ li(R0, 0);
     __ membar(Assembler::StoreStore);
 #if 0 // TODO: PPC port
     __ bind(skip_storestore);
 #endif
 
     // Do the store.
     if ($mem$$index == 0) {
-      __ stb(R0, $mem$$disp, $mem$$base$$Register);
+      __ stb(R30_zero, $mem$$disp, $mem$$base$$Register);
     } else {
       assert(0 == $mem$$disp, "no displacement possible with indexed load/stores on ppc");
-      __ stbx(R0, $mem$$base$$Register, $mem$$index$$Register);
+      __ stbx(R30_zero, $mem$$base$$Register, $mem$$index$$Register);
     }
   %}
 
   enc_class postalloc_expand_encode_oop(iRegNdst dst, iRegPdst src, flagsReg crx) %{
 

@@ -6278,10 +6277,24 @@
     __ stb($src$$Register, Idisp, $mem$$base$$Register);
   %}
   ins_pipe(pipe_class_memory);
 %}
 
+instruct storeB_0(memory mem, immI_0 zero) %{
+  match(Set mem (StoreB mem zero));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "STB     0, $mem \t// store 0 on a byte" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_stb);
+    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
+    __ stb(R30_zero, Idisp, $mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 // Store Char/Short
 instruct storeC(memory mem, iRegIsrc src) %{
   match(Set mem (StoreC mem src));
   ins_cost(MEMORY_REF_COST);
 

@@ -6293,10 +6306,24 @@
     __ sth($src$$Register, Idisp, $mem$$base$$Register);
   %}
   ins_pipe(pipe_class_memory);
 %}
 
+instruct storeC_0(memory mem, immI_0 zero) %{
+  match(Set mem (StoreC mem zero));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "STH     0, $mem \t// store 0 on a short" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_sth);
+    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
+    __ sth(R30_zero, Idisp, $mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 // Store Integer
 instruct storeI(memory mem, iRegIsrc src) %{
   match(Set mem (StoreI mem src));
   ins_cost(MEMORY_REF_COST);
 

@@ -6304,10 +6331,23 @@
   size(4);
   ins_encode( enc_stw(src, mem) );
   ins_pipe(pipe_class_memory);
 %}
 
+instruct storeI_0(memory mem, immI_0 zero) %{
+  match(Set mem (StoreI mem zero));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "STW     0, $mem \t// store 0 on a word" %}
+  size(4);
+  ins_encode %{
+    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
+    __ stw(R30_zero, Idisp, $mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 // ConvL2I + StoreI.
 instruct storeI_convL2I(memory mem, iRegLsrc src) %{
   match(Set mem (StoreI mem (ConvL2I src)));
   ins_cost(MEMORY_REF_COST);
 

@@ -6326,10 +6366,25 @@
   size(4);
   ins_encode( enc_std(src, mem) );
   ins_pipe(pipe_class_memory);
 %}
 
+instruct storeL_0(memoryAlg4 mem, immL_0 zero) %{
+  match(Set mem (StoreL mem zero));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "STD     0, $mem \t// store 0 on a long" %}
+  size(4);
+  ins_encode %{
+    int Idisp = $mem$$disp + frame_slots_bias($mem$$base, ra_);
+    // Operand 'ds' requires 4-alignment.
+    assert((Idisp & 0x3) == 0, "unaligned offset");
+    __ std(R30_zero, Idisp, $mem$$base$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 // Store super word nodes.
 
 // Store Aligned Packed Byte long register to memory
 instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
   predicate(n->as_StoreVector()->memory_size() == 8);

@@ -6446,14 +6501,13 @@
 
   format %{ "STB     #0, $mem \t// CMS card-mark byte store (G1)" %}
   size(8);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ li(R0, 0);
     //__ release(); // G1: oops are allowed to get visible after dirty marking
     guarantee($mem$$base$$Register != R1_SP, "use frame_slots_bias");
-    __ stb(R0, $mem$$disp, $mem$$base$$Register);
+    __ stb(R30_zero, $mem$$disp, $mem$$base$$Register);
   %}
   ins_pipe(pipe_class_memory);
 %}
 
 // Convert oop pointer into compressed form.

@@ -11294,12 +11348,11 @@
   size(false /* TODO: PPC PORT(InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 16 : 12);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmovecr);
     Label done;
     __ bns($crx$$CondRegister, done);        // not unordered -> keep crx
-    __ li(R0, 0);
-    __ cmpwi($crx$$CondRegister, R0, 1);     // unordered -> set crx to 'less'
+    __ cmpwi($crx$$CondRegister, R30_zero, 1); // unordered -> set crx to 'less'
     // TODO PPC port __ endgroup_if_needed(_size == 16);
     __ bind(done);
   %}
   ins_pipe(pipe_class_default);
 %}

@@ -12746,52 +12799,48 @@
   match(Set cr0 (OverflowAddL op1 op2));
 
   format %{ "add_    $op1, $op2\t# overflow check long" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ li(R0, 0);
-    __ mtxer(R0); // clear XER.SO
-    __ addo_(R0, $op1$$Register, $op2$$Register);
+    __ mtxer(R30_zero); // clear XER.SO
+    __ addo_(R30_zero, $op1$$Register, $op2$$Register);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
   match(Set cr0 (OverflowSubL op1 op2));
 
   format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ li(R0, 0);
-    __ mtxer(R0); // clear XER.SO
-    __ subfo_(R0, $op2$$Register, $op1$$Register);
+    __ mtxer(R30_zero); // clear XER.SO
+    __ subfo_(R30_zero, $op2$$Register, $op1$$Register);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
   match(Set cr0 (OverflowSubL zero op2));
 
   format %{ "nego_   R0, $op2\t# overflow check long" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ li(R0, 0);
-    __ mtxer(R0); // clear XER.SO
-    __ nego_(R0, $op2$$Register);
+    __ mtxer(R30_zero); // clear XER.SO
+    __ nego_(R30_zero, $op2$$Register);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
   match(Set cr0 (OverflowMulL op1 op2));
 
   format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ li(R0, 0);
-    __ mtxer(R0); // clear XER.SO
-    __ mulldo_(R0, $op1$$Register, $op2$$Register);
+    __ mtxer(R30_zero); // clear XER.SO
+    __ mulldo_(R30_zero, $op1$$Register, $op2$$Register);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 
< prev index next >