< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page
rev 13390 : Fix issues pointed out by Martin Doerr.

http://mail.openjdk.java.net/pipermail/hotspot-compiler-dev/2017-August/026850.html


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 
































































































































































































































































3309 
3310   /**
3311    * Arguments:
3312    *
3313    * Inputs:
3314    *   R3_ARG1    - int   crc
3315    *   R4_ARG2    - byte* buf
3316    *   R5_ARG3    - int   length (of buffer)
3317    *
3318    * scratch:
3319    *   R2, R6-R12
3320    *
3321    * Ouput:
3322    *   R3_RET     - int   crc result
3323    */
3324   // Compute CRC32 function.
3325   address generate_CRC32_updateBytes(const char* name) {
3326     __ align(CodeEntryAlignment);
3327     StubCodeMark mark(this, "StubRoutines", name);
3328     address start = __ function_entry();  // Remember stub start address (is rtn value).


3483     // support for verify_oop (must happen after universe_init)
3484     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3485 
3486     // arraycopy stubs used by compilers
3487     generate_arraycopy_stubs();
3488 
3489     // Safefetch stubs.
3490     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3491                                                        &StubRoutines::_safefetch32_fault_pc,
3492                                                        &StubRoutines::_safefetch32_continuation_pc);
3493     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494                                                        &StubRoutines::_safefetchN_fault_pc,
3495                                                        &StubRoutines::_safefetchN_continuation_pc);
3496 
3497 #ifdef COMPILER2
3498     if (UseMultiplyToLenIntrinsic) {
3499       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500     }
3501 #endif
3502 






3503     if (UseMontgomeryMultiplyIntrinsic) {
3504       StubRoutines::_montgomeryMultiply
3505         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506     }
3507     if (UseMontgomerySquareIntrinsic) {
3508       StubRoutines::_montgomerySquare
3509         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510     }
3511 
3512     if (UseAESIntrinsics) {
3513       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515     }
3516 
3517   }
3518 
3519  public:
3520   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521     // replace the standard masm with a special one:
3522     _masm = new MacroAssembler(code);


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 
3309   /**
3310   *  Arguments:
3311   *
3312   *  Input:
3313   *   R3_ARG1    - out address
3314   *   R4_ARG2    - in address
3315   *   R5_ARG3    - offset
3316   *   R6_ARG4    - len
3317   *   R7_ARG5    - k
3318   *  Output:
3319   *   R3_RET     - carry
3320   */
3321   address generate_mulAdd() {
3322     __ align(CodeEntryAlignment);
3323     StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324 
3325     address start = __ function_entry();
3326 
3327     // C2 does not sign extend signed parameters to full 64 bits registers:
3328     __ rldicl (R5_ARG3, R5_ARG3, 2, 32);  // always positive
3329     __ clrldi (R7_ARG5, R7_ARG5, 32);     // force zero bits on higher word
3330 
3331     __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3332 
3333     // Moves output carry to return register
3334     __ mr(R3_RET, R10);
3335 
3336     __ blr();
3337 
3338     return start;
3339   }
3340 
3341   /**
3342   *  Arguments:
3343   *
3344   *  Input:
3345   *   R3_ARG1    - in address
3346   *   R4_ARG2    - in length
3347   *   R5_ARG3    - out address
3348   *   R6_ARG4    - out length
3349   */
3350   address generate_squareToLen() {
3351     __ align(CodeEntryAlignment);
3352     StubCodeMark mark(this, "StubRoutines", "squareToLen");
3353 
3354     address start = __ function_entry();
3355 
3356     // args
3357     const Register in        = R3_ARG1;
3358     const Register in_len    = R4_ARG2;
3359     const Register out       = R5_ARG3;
3360     const Register out_len   = R6_ARG4;
3361 
3362     // output
3363     const Register ret       = R3_RET;
3364 
3365     // temporaries
3366     const Register lplw_s    = R7;
3367     const Register in_aux    = R8;
3368     const Register out_aux   = R9;
3369     const Register piece     = R10;
3370     const Register product   = R14;
3371     const Register product_s = R15;
3372     const Register lplw      = R16;
3373     const Register i_minus1  = R17;
3374     const Register carry     = R18;
3375     const Register offset    = R19;
3376     const Register off_aux   = R20;
3377     const Register t         = R21;
3378     const Register mlen      = R22;
3379     const Register len       = R23;
3380     const Register a         = R24;
3381     const Register b         = R25;
3382     const Register i         = R26;
3383     const Register c         = R27;
3384     const Register cs        = R28;
3385 
3386     // Labels
3387     Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3388     Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3389 
3390     // Save non-volatile regs (frameless).
3391     int current_offs = -8;
3392     __ std(R28, current_offs, R1_SP); current_offs -= 8;
3393     __ std(R27, current_offs, R1_SP); current_offs -= 8;
3394     __ std(R26, current_offs, R1_SP); current_offs -= 8;
3395     __ std(R25, current_offs, R1_SP); current_offs -= 8;
3396     __ std(R24, current_offs, R1_SP); current_offs -= 8;
3397     __ std(R23, current_offs, R1_SP); current_offs -= 8;
3398     __ std(R22, current_offs, R1_SP); current_offs -= 8;
3399     __ std(R21, current_offs, R1_SP); current_offs -= 8;
3400     __ std(R20, current_offs, R1_SP); current_offs -= 8;
3401     __ std(R19, current_offs, R1_SP); current_offs -= 8;
3402     __ std(R18, current_offs, R1_SP); current_offs -= 8;
3403     __ std(R17, current_offs, R1_SP); current_offs -= 8;
3404     __ std(R16, current_offs, R1_SP); current_offs -= 8;
3405     __ std(R15, current_offs, R1_SP); current_offs -= 8;
3406     __ std(R14, current_offs, R1_SP);
3407 
3408     // Store the squares, right shifted one bit (i.e., divided by 2)
3409     __ subi   (out_aux,   out,       8);
3410     __ subi   (in_aux,    in,        4);
3411     __ cmpwi  (CCR0,      in_len,    0);
3412     // Initialize lplw outside of the loop
3413     __ xorr   (lplw,      lplw,      lplw);
3414     __ ble    (CCR0,      SKIP_LOOP_SQUARE);    // in_len <= 0
3415     __ mtctr  (in_len);
3416 
3417     __ bind(LOOP_SQUARE);
3418     __ lwzu   (piece,     4,         in_aux);
3419     __ mulld  (product,   piece,     piece);
3420     // shift left 31 bits and only keep the 31th bit
3421     __ rlwinm (lplw_s,    lplw,      31, 0, 0);
3422     // shift right 33 bits without sign extension
3423     __ srdi   (product_s, product,   33);
3424     __ orr    (product_s, lplw_s,    product_s);
3425     __ mr     (lplw,      product);
3426     __ rldicl (product,   product,   31, 31);
3427     __ orr    (product,   product,   product_s);
3428     __ stdu   (product,   8,         out_aux);
3429     __ bdnz   (LOOP_SQUARE);
3430 
3431     __ bind(SKIP_LOOP_SQUARE);
3432 
3433     // Add in off-diagonal sums
3434     __ cmpwi  (CCR0,      in_len,    0);
3435     __ ble    (CCR0,      SKIP_DIAGONAL_SUM);
3436     // Avoid CTR usage here in order to use it at mulAdd
3437     __ subi   (i_minus1,  in_len,    1);
3438     __ li     (offset,    4);
3439 
3440     __ bind(LOOP_DIAGONAL_SUM);
3441 
3442     __ sldi   (off_aux,   out_len,   2);
3443     __ sub    (off_aux,   off_aux,   offset);
3444 
3445     __ mr     (len,       i_minus1);
3446     __ sldi   (mlen,      i_minus1,  2);
3447     __ lwzx   (t,         in,        mlen);
3448 
3449     __ muladd (out, in, off_aux, len, t, a, b, carry);
3450 
3451     // begin<addOne>
3452     // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3453     __ addi   (mlen,      mlen,      4);
3454     __ sldi   (a,         out_len,   2);
3455     __ subi   (a,         a,         4);
3456     __ sub    (a,         a,         mlen);
3457     __ subi   (off_aux,   offset,    4);
3458     __ sub    (off_aux,   a,         off_aux);
3459 
3460     __ lwzx   (b,         off_aux,   out);
3461     __ add    (b,         b,         carry);
3462     __ stwx   (b,         off_aux,   out);
3463 
3464     // if (((uint64_t)s >> 32) != 0) {
3465     __ srdi_  (a,         b,         32);
3466     __ beq    (CCR0,      SKIP_ADDONE);
3467 
3468     // while (--mlen >= 0) {
3469     __ bind(LOOP_ADDONE);
3470     __ subi   (mlen,      mlen,      4);
3471     __ cmpwi  (CCR0,      mlen,      0);
3472     __ beq    (CCR0,      SKIP_ADDONE);
3473 
3474     // if (--offset_aux < 0) { // Carry out of number
3475     __ subi   (off_aux,   off_aux,   4);
3476     __ cmpwi  (CCR0,      off_aux,   0);
3477     __ blt    (CCR0,      SKIP_ADDONE);
3478 
3479     // } else {
3480     __ lwzx   (b,         off_aux,   out);
3481     __ addi   (b,         b,         1);
3482     __ stwx   (b,         off_aux,   out);
3483     __ cmpwi  (CCR0,      b,         0);
3484     __ bne    (CCR0,      SKIP_ADDONE);
3485     __ b      (LOOP_ADDONE);
3486 
3487     __ bind(SKIP_ADDONE);
3488     // } } } end<addOne>
3489 
3490     __ addi   (offset,    offset,    8);
3491     __ subi   (i_minus1,  i_minus1,  1);
3492     __ cmpwi  (CCR0,      i_minus1,  0);
3493     __ bge    (CCR0,      LOOP_DIAGONAL_SUM);
3494 
3495     __ bind(SKIP_DIAGONAL_SUM);
3496 
3497     // Shift back up and set low bit
3498     // Shifts 1 bit left up to len positions. Assumes no leading zeros
3499     // begin<primitiveLeftShift>
3500     __ cmpwi  (CCR0,      out_len,   0);
3501     __ ble    (CCR0,      SKIP_LSHIFT);
3502     __ li     (i,         0);
3503     __ lwz    (c,         0,         out);
3504     __ subi   (b,         out_len,   1);
3505     __ mtctr  (b);
3506 
3507     __ bind(LOOP_LSHIFT);
3508     __ mr     (b,         c);
3509     __ addi   (cs,        i,         4);
3510     __ lwzx   (c,         out,       cs);
3511 
3512     __ sldi   (b,         b,         1);
3513     __ srwi   (cs,        c,         31);
3514     __ orr    (b,         b,         cs);
3515     __ stwx   (b,         i,         out);
3516 
3517     __ addi   (i,         i,         4);
3518     __ bdnz   (LOOP_LSHIFT);
3519 
3520     __ sldi   (c,         out_len,   2);
3521     __ subi   (c,         c,         4);
3522     __ lwzx   (b,         out,       c);
3523     __ sldi   (b,         b,         1);
3524     __ stwx   (b,         out,       c);
3525 
3526     __ bind(SKIP_LSHIFT);
3527     // end<primitiveLeftShift>
3528 
3529     // Set low bit
3530     __ sldi   (i,         in_len,    2);
3531     __ subi   (i,         i,         4);
3532     __ lwzx   (i,         in,        i);
3533     __ sldi   (c,         out_len,   2);
3534     __ subi   (c,         c,         4);
3535     __ lwzx   (b,         out,       c);
3536 
3537     __ andi   (i,         i,         1);
3538     __ orr    (i,         b,         i);
3539 
3540     __ stwx   (i,         out,       c);
3541 
3542     // Restore non-volatile regs.
3543     current_offs = -8;
3544     __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3545     __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3546     __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3547     __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3548     __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3549     __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3550     __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3551     __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3552     __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3553     __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3554     __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3555     __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3556     __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3557     __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3558     __ ld(R14, current_offs, R1_SP);
3559 
3560     __ mr(ret, out);
3561     __ blr();
3562 
3563     return start;
3564   }
3565 
3566   /**
3567    * Arguments:
3568    *
3569    * Inputs:
3570    *   R3_ARG1    - int   crc
3571    *   R4_ARG2    - byte* buf
3572    *   R5_ARG3    - int   length (of buffer)
3573    *
3574    * scratch:
3575    *   R2, R6-R12
3576    *
3577    * Ouput:
3578    *   R3_RET     - int   crc result
3579    */
3580   // Compute CRC32 function.
3581   address generate_CRC32_updateBytes(const char* name) {
3582     __ align(CodeEntryAlignment);
3583     StubCodeMark mark(this, "StubRoutines", name);
3584     address start = __ function_entry();  // Remember stub start address (is rtn value).


3739     // support for verify_oop (must happen after universe_init)
3740     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3741 
3742     // arraycopy stubs used by compilers
3743     generate_arraycopy_stubs();
3744 
3745     // Safefetch stubs.
3746     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3747                                                        &StubRoutines::_safefetch32_fault_pc,
3748                                                        &StubRoutines::_safefetch32_continuation_pc);
3749     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3750                                                        &StubRoutines::_safefetchN_fault_pc,
3751                                                        &StubRoutines::_safefetchN_continuation_pc);
3752 
3753 #ifdef COMPILER2
3754     if (UseMultiplyToLenIntrinsic) {
3755       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3756     }
3757 #endif
3758 
3759     if (UseSquareToLenIntrinsic) {
3760       StubRoutines::_squareToLen = generate_squareToLen();
3761     }
3762     if (UseMulAddIntrinsic) {
3763       StubRoutines::_mulAdd = generate_mulAdd();
3764     }
3765     if (UseMontgomeryMultiplyIntrinsic) {
3766       StubRoutines::_montgomeryMultiply
3767         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3768     }
3769     if (UseMontgomerySquareIntrinsic) {
3770       StubRoutines::_montgomerySquare
3771         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3772     }
3773 
3774     if (UseAESIntrinsics) {
3775       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3776       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3777     }
3778 
3779   }
3780 
3781  public:
3782   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3783     // replace the standard masm with a special one:
3784     _masm = new MacroAssembler(code);
< prev index next >