< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page
rev 13391 : Fix other issues pointed by Martin Doerr.

http://mail.openjdk.java.net/pipermail/hotspot-compiler-dev/2017-August/026899.html

Also took this chance to remove 1 register usage on squaretolen.


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 































































































































































































































































3309 
3310   /**
3311    * Arguments:
3312    *
3313    * Inputs:
3314    *   R3_ARG1    - int   crc
3315    *   R4_ARG2    - byte* buf
3316    *   R5_ARG3    - int   length (of buffer)
3317    *
3318    * scratch:
3319    *   R2, R6-R12
3320    *
3321    * Ouput:
3322    *   R3_RET     - int   crc result
3323    */
3324   // Compute CRC32 function.
3325   address generate_CRC32_updateBytes(const char* name) {
3326     __ align(CodeEntryAlignment);
3327     StubCodeMark mark(this, "StubRoutines", name);
3328     address start = __ function_entry();  // Remember stub start address (is rtn value).


3483     // support for verify_oop (must happen after universe_init)
3484     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3485 
3486     // arraycopy stubs used by compilers
3487     generate_arraycopy_stubs();
3488 
3489     // Safefetch stubs.
3490     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3491                                                        &StubRoutines::_safefetch32_fault_pc,
3492                                                        &StubRoutines::_safefetch32_continuation_pc);
3493     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494                                                        &StubRoutines::_safefetchN_fault_pc,
3495                                                        &StubRoutines::_safefetchN_continuation_pc);
3496 
3497 #ifdef COMPILER2
3498     if (UseMultiplyToLenIntrinsic) {
3499       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500     }
3501 #endif
3502 






3503     if (UseMontgomeryMultiplyIntrinsic) {
3504       StubRoutines::_montgomeryMultiply
3505         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506     }
3507     if (UseMontgomerySquareIntrinsic) {
3508       StubRoutines::_montgomerySquare
3509         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510     }
3511 
3512     if (UseAESIntrinsics) {
3513       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515     }
3516 
3517   }
3518 
3519  public:
3520   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521     // replace the standard masm with a special one:
3522     _masm = new MacroAssembler(code);


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 
3309   /**
3310   *  Arguments:
3311   *
3312   *  Input:
3313   *   R3_ARG1    - out address
3314   *   R4_ARG2    - in address
3315   *   R5_ARG3    - offset
3316   *   R6_ARG4    - len
3317   *   R7_ARG5    - k
3318   *  Output:
3319   *   R3_RET     - carry
3320   */
3321   address generate_mulAdd() {
3322     __ align(CodeEntryAlignment);
3323     StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324 
3325     address start = __ function_entry();
3326 
3327     // C2 does not sign extend signed parameters to full 64 bits registers:
3328     __ rldic (R5_ARG3, R5_ARG3, 2, 32);  // always positive
3329     __ clrldi(R7_ARG5, R7_ARG5, 32);     // force zero bits on higher word
3330 
3331     __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3332 
3333     // Moves output carry to return register
3334     __ mr    (R3_RET,  R10);
3335 
3336     __ blr();
3337 
3338     return start;
3339   }
3340 
3341   /**
3342   *  Arguments:
3343   *
3344   *  Input:
3345   *   R3_ARG1    - in address
3346   *   R4_ARG2    - in length
3347   *   R5_ARG3    - out address
3348   *   R6_ARG4    - out length
3349   */
3350   address generate_squareToLen() {
3351     __ align(CodeEntryAlignment);
3352     StubCodeMark mark(this, "StubRoutines", "squareToLen");
3353 
3354     address start = __ function_entry();
3355 
3356     // args
3357     const Register in        = R3_ARG1;
3358     const Register in_len    = R4_ARG2;
3359     const Register out       = R5_ARG3;
3360     const Register out_len   = R6_ARG4;
3361 
3362     // output
3363     const Register ret       = R3_RET;
3364 
3365     // temporaries
3366     const Register lplw_s    = R7;
3367     const Register in_aux    = R8;
3368     const Register out_aux   = R9;
3369     const Register piece     = R10;
3370     const Register product   = R14;
3371     const Register lplw      = R15;
3372     const Register i_minus1  = R16;
3373     const Register carry     = R17;
3374     const Register offset    = R18;
3375     const Register off_aux   = R19;
3376     const Register t         = R20;
3377     const Register mlen      = R21;
3378     const Register len       = R22;
3379     const Register a         = R23;
3380     const Register b         = R24;
3381     const Register i         = R25;
3382     const Register c         = R26;
3383     const Register cs        = R27;
3384 
3385     // Labels
3386     Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3387     Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3388 
3389     // Save non-volatile regs (frameless).
3390     int current_offs = -8;
3391     __ std(R28, current_offs, R1_SP); current_offs -= 8;
3392     __ std(R27, current_offs, R1_SP); current_offs -= 8;
3393     __ std(R26, current_offs, R1_SP); current_offs -= 8;
3394     __ std(R25, current_offs, R1_SP); current_offs -= 8;
3395     __ std(R24, current_offs, R1_SP); current_offs -= 8;
3396     __ std(R23, current_offs, R1_SP); current_offs -= 8;
3397     __ std(R22, current_offs, R1_SP); current_offs -= 8;
3398     __ std(R21, current_offs, R1_SP); current_offs -= 8;
3399     __ std(R20, current_offs, R1_SP); current_offs -= 8;
3400     __ std(R19, current_offs, R1_SP); current_offs -= 8;
3401     __ std(R18, current_offs, R1_SP); current_offs -= 8;
3402     __ std(R17, current_offs, R1_SP); current_offs -= 8;
3403     __ std(R16, current_offs, R1_SP); current_offs -= 8;
3404     __ std(R15, current_offs, R1_SP); current_offs -= 8;
3405     __ std(R14, current_offs, R1_SP);
3406 
3407     // Store the squares, right shifted one bit (i.e., divided by 2)
3408     __ subi   (out_aux,   out,       8);
3409     __ subi   (in_aux,    in,        4);
3410     __ cmpwi  (CCR0,      in_len,    0);
3411     // Initialize lplw outside of the loop
3412     __ xorr   (lplw,      lplw,      lplw);
3413     __ ble    (CCR0,      SKIP_LOOP_SQUARE);    // in_len <= 0
3414     __ mtctr  (in_len);
3415 
3416     __ bind(LOOP_SQUARE);
3417     __ lwzu   (piece,     4,         in_aux);
3418     __ mulld  (product,   piece,     piece);
3419     // shift left 63 bits and only keep the MSB
3420     __ rldic  (lplw_s,    lplw,      63, 0);
3421     __ mr     (lplw,      product);
3422     // shift right 1 bit without sign extension
3423     __ srdi   (product,   product,   1);
3424     // join them to the same register and store it as Little Endian
3425     __ orr    (product,   lplw_s,    product);
3426     __ rldicl (product,   product,   32, 0);
3427     __ stdu   (product,   8,         out_aux);
3428     __ bdnz   (LOOP_SQUARE);
3429 
3430     __ bind(SKIP_LOOP_SQUARE);
3431 
3432     // Add in off-diagonal sums
3433     __ cmpwi  (CCR0,      in_len,    0);
3434     __ ble    (CCR0,      SKIP_DIAGONAL_SUM);
3435     // Avoid CTR usage here in order to use it at mulAdd
3436     __ subi   (i_minus1,  in_len,    1);
3437     __ li     (offset,    4);
3438 
3439     __ bind(LOOP_DIAGONAL_SUM);
3440 
3441     __ sldi   (off_aux,   out_len,   2);
3442     __ sub    (off_aux,   off_aux,   offset);
3443 
3444     __ mr     (len,       i_minus1);
3445     __ sldi   (mlen,      i_minus1,  2);
3446     __ lwzx   (t,         in,        mlen);
3447 
3448     __ muladd (out, in, off_aux, len, t, a, b, carry);
3449 
3450     // begin<addOne>
3451     // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3452     __ addi   (mlen,      mlen,      4);
3453     __ sldi   (a,         out_len,   2);
3454     __ subi   (a,         a,         4);
3455     __ sub    (a,         a,         mlen);
3456     __ subi   (off_aux,   offset,    4);
3457     __ sub    (off_aux,   a,         off_aux);
3458 
3459     __ lwzx   (b,         off_aux,   out);
3460     __ add    (b,         b,         carry);
3461     __ stwx   (b,         off_aux,   out);
3462 
3463     // if (((uint64_t)s >> 32) != 0) {
3464     __ srdi_  (a,         b,         32);
3465     __ beq    (CCR0,      SKIP_ADDONE);
3466 
3467     // while (--mlen >= 0) {
3468     __ bind(LOOP_ADDONE);
3469     __ subi   (mlen,      mlen,      4);
3470     __ cmpwi  (CCR0,      mlen,      0);
3471     __ beq    (CCR0,      SKIP_ADDONE);
3472 
3473     // if (--offset_aux < 0) { // Carry out of number
3474     __ subi   (off_aux,   off_aux,   4);
3475     __ cmpwi  (CCR0,      off_aux,   0);
3476     __ blt    (CCR0,      SKIP_ADDONE);
3477 
3478     // } else {
3479     __ lwzx   (b,         off_aux,   out);
3480     __ addi   (b,         b,         1);
3481     __ stwx   (b,         off_aux,   out);
3482     __ cmpwi  (CCR0,      b,         0);
3483     __ bne    (CCR0,      SKIP_ADDONE);
3484     __ b      (LOOP_ADDONE);
3485 
3486     __ bind(SKIP_ADDONE);
3487     // } } } end<addOne>
3488 
3489     __ addi   (offset,    offset,    8);
3490     __ subi   (i_minus1,  i_minus1,  1);
3491     __ cmpwi  (CCR0,      i_minus1,  0);
3492     __ bge    (CCR0,      LOOP_DIAGONAL_SUM);
3493 
3494     __ bind(SKIP_DIAGONAL_SUM);
3495 
3496     // Shift back up and set low bit
3497     // Shifts 1 bit left up to len positions. Assumes no leading zeros
3498     // begin<primitiveLeftShift>
3499     __ cmpwi  (CCR0,      out_len,   0);
3500     __ ble    (CCR0,      SKIP_LSHIFT);
3501     __ li     (i,         0);
3502     __ lwz    (c,         0,         out);
3503     __ subi   (b,         out_len,   1);
3504     __ mtctr  (b);
3505 
3506     __ bind(LOOP_LSHIFT);
3507     __ mr     (b,         c);
3508     __ addi   (cs,        i,         4);
3509     __ lwzx   (c,         out,       cs);
3510 
3511     __ sldi   (b,         b,         1);
3512     __ srwi   (cs,        c,         31);
3513     __ orr    (b,         b,         cs);
3514     __ stwx   (b,         i,         out);
3515 
3516     __ addi   (i,         i,         4);
3517     __ bdnz   (LOOP_LSHIFT);
3518 
3519     __ sldi   (c,         out_len,   2);
3520     __ subi   (c,         c,         4);
3521     __ lwzx   (b,         out,       c);
3522     __ sldi   (b,         b,         1);
3523     __ stwx   (b,         out,       c);
3524 
3525     __ bind(SKIP_LSHIFT);
3526     // end<primitiveLeftShift>
3527 
3528     // Set low bit
3529     __ sldi   (i,         in_len,    2);
3530     __ subi   (i,         i,         4);
3531     __ lwzx   (i,         in,        i);
3532     __ sldi   (c,         out_len,   2);
3533     __ subi   (c,         c,         4);
3534     __ lwzx   (b,         out,       c);
3535 
3536     __ andi   (i,         i,         1);
3537     __ orr    (i,         b,         i);
3538 
3539     __ stwx   (i,         out,       c);
3540 
3541     // Restore non-volatile regs.
3542     current_offs = -8;
3543     __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3544     __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3545     __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3546     __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3547     __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3548     __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3549     __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3550     __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3551     __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3552     __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3553     __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3554     __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3555     __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3556     __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3557     __ ld(R14, current_offs, R1_SP);
3558 
3559     __ mr(ret, out);
3560     __ blr();
3561 
3562     return start;
3563   }
3564 
3565   /**
3566    * Arguments:
3567    *
3568    * Inputs:
3569    *   R3_ARG1    - int   crc
3570    *   R4_ARG2    - byte* buf
3571    *   R5_ARG3    - int   length (of buffer)
3572    *
3573    * scratch:
3574    *   R2, R6-R12
3575    *
3576    * Ouput:
3577    *   R3_RET     - int   crc result
3578    */
3579   // Compute CRC32 function.
3580   address generate_CRC32_updateBytes(const char* name) {
3581     __ align(CodeEntryAlignment);
3582     StubCodeMark mark(this, "StubRoutines", name);
3583     address start = __ function_entry();  // Remember stub start address (is rtn value).


3738     // support for verify_oop (must happen after universe_init)
3739     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3740 
3741     // arraycopy stubs used by compilers
3742     generate_arraycopy_stubs();
3743 
3744     // Safefetch stubs.
3745     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3746                                                        &StubRoutines::_safefetch32_fault_pc,
3747                                                        &StubRoutines::_safefetch32_continuation_pc);
3748     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3749                                                        &StubRoutines::_safefetchN_fault_pc,
3750                                                        &StubRoutines::_safefetchN_continuation_pc);
3751 
3752 #ifdef COMPILER2
3753     if (UseMultiplyToLenIntrinsic) {
3754       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3755     }
3756 #endif
3757 
3758     if (UseSquareToLenIntrinsic) {
3759       StubRoutines::_squareToLen = generate_squareToLen();
3760     }
3761     if (UseMulAddIntrinsic) {
3762       StubRoutines::_mulAdd = generate_mulAdd();
3763     }
3764     if (UseMontgomeryMultiplyIntrinsic) {
3765       StubRoutines::_montgomeryMultiply
3766         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3767     }
3768     if (UseMontgomerySquareIntrinsic) {
3769       StubRoutines::_montgomerySquare
3770         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3771     }
3772 
3773     if (UseAESIntrinsics) {
3774       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3775       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3776     }
3777 
3778   }
3779 
3780  public:
3781   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3782     // replace the standard masm with a special one:
3783     _masm = new MacroAssembler(code);
< prev index next >