< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page
rev 13392 : Clear int to long as Martin suggested

http://mail.openjdk.java.net/pipermail/hotspot-compiler-dev/2017-August/027009.html


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 


































































































































































































































































3309 
3310   /**
3311    * Arguments:
3312    *
3313    * Inputs:
3314    *   R3_ARG1    - int   crc
3315    *   R4_ARG2    - byte* buf
3316    *   R5_ARG3    - int   length (of buffer)
3317    *
3318    * scratch:
3319    *   R2, R6-R12
3320    *
3321    * Ouput:
3322    *   R3_RET     - int   crc result
3323    */
3324   // Compute CRC32 function.
3325   address generate_CRC32_updateBytes(const char* name) {
3326     __ align(CodeEntryAlignment);
3327     StubCodeMark mark(this, "StubRoutines", name);
3328     address start = __ function_entry();  // Remember stub start address (is rtn value).


3483     // support for verify_oop (must happen after universe_init)
3484     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3485 
3486     // arraycopy stubs used by compilers
3487     generate_arraycopy_stubs();
3488 
3489     // Safefetch stubs.
3490     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3491                                                        &StubRoutines::_safefetch32_fault_pc,
3492                                                        &StubRoutines::_safefetch32_continuation_pc);
3493     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494                                                        &StubRoutines::_safefetchN_fault_pc,
3495                                                        &StubRoutines::_safefetchN_continuation_pc);
3496 
3497 #ifdef COMPILER2
3498     if (UseMultiplyToLenIntrinsic) {
3499       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500     }
3501 #endif
3502 






3503     if (UseMontgomeryMultiplyIntrinsic) {
3504       StubRoutines::_montgomeryMultiply
3505         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506     }
3507     if (UseMontgomerySquareIntrinsic) {
3508       StubRoutines::_montgomerySquare
3509         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510     }
3511 
3512     if (UseAESIntrinsics) {
3513       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515     }
3516 
3517   }
3518 
3519  public:
3520   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521     // replace the standard masm with a special one:
3522     _masm = new MacroAssembler(code);


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 
3309   /**
3310   *  Arguments:
3311   *
3312   *  Input:
3313   *   R3_ARG1    - out address
3314   *   R4_ARG2    - in address
3315   *   R5_ARG3    - offset
3316   *   R6_ARG4    - len
3317   *   R7_ARG5    - k
3318   *  Output:
3319   *   R3_RET     - carry
3320   */
3321   address generate_mulAdd() {
3322     __ align(CodeEntryAlignment);
3323     StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324 
3325     address start = __ function_entry();
3326 
3327     // C2 does not sign extend signed parameters to full 64 bits registers:
3328     __ rldic (R5_ARG3, R5_ARG3, 2, 32);  // always positive
3329     __ clrldi(R6_ARG4, R6_ARG4, 32);     // force zero bits on higher word
3330     __ clrldi(R7_ARG5, R7_ARG5, 32);     // force zero bits on higher word
3331 
3332     __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3333 
3334     // Moves output carry to return register
3335     __ mr    (R3_RET,  R10);
3336 
3337     __ blr();
3338 
3339     return start;
3340   }
3341 
3342   /**
3343   *  Arguments:
3344   *
3345   *  Input:
3346   *   R3_ARG1    - in address
3347   *   R4_ARG2    - in length
3348   *   R5_ARG3    - out address
3349   *   R6_ARG4    - out length
3350   */
3351   address generate_squareToLen() {
3352     __ align(CodeEntryAlignment);
3353     StubCodeMark mark(this, "StubRoutines", "squareToLen");
3354 
3355     address start = __ function_entry();
3356 
3357     // args - higher word is cleaned (unsignedly) due to int to long casting
3358     const Register in        = R3_ARG1;
3359     const Register in_len    = R4_ARG2;
3360     __ clrldi(in_len, in_len, 32);
3361     const Register out       = R5_ARG3;
3362     const Register out_len   = R6_ARG4;
3363     __ clrldi(out_len, out_len, 32);
3364 
3365     // output
3366     const Register ret       = R3_RET;
3367 
3368     // temporaries
3369     const Register lplw_s    = R7;
3370     const Register in_aux    = R8;
3371     const Register out_aux   = R9;
3372     const Register piece     = R10;
3373     const Register product   = R14;
3374     const Register lplw      = R15;
3375     const Register i_minus1  = R16;
3376     const Register carry     = R17;
3377     const Register offset    = R18;
3378     const Register off_aux   = R19;
3379     const Register t         = R20;
3380     const Register mlen      = R21;
3381     const Register len       = R22;
3382     const Register a         = R23;
3383     const Register b         = R24;
3384     const Register i         = R25;
3385     const Register c         = R26;
3386     const Register cs        = R27;
3387 
3388     // Labels
3389     Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3390     Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3391 
3392     // Save non-volatile regs (frameless).
3393     int current_offs = -8;
3394     __ std(R28, current_offs, R1_SP); current_offs -= 8;
3395     __ std(R27, current_offs, R1_SP); current_offs -= 8;
3396     __ std(R26, current_offs, R1_SP); current_offs -= 8;
3397     __ std(R25, current_offs, R1_SP); current_offs -= 8;
3398     __ std(R24, current_offs, R1_SP); current_offs -= 8;
3399     __ std(R23, current_offs, R1_SP); current_offs -= 8;
3400     __ std(R22, current_offs, R1_SP); current_offs -= 8;
3401     __ std(R21, current_offs, R1_SP); current_offs -= 8;
3402     __ std(R20, current_offs, R1_SP); current_offs -= 8;
3403     __ std(R19, current_offs, R1_SP); current_offs -= 8;
3404     __ std(R18, current_offs, R1_SP); current_offs -= 8;
3405     __ std(R17, current_offs, R1_SP); current_offs -= 8;
3406     __ std(R16, current_offs, R1_SP); current_offs -= 8;
3407     __ std(R15, current_offs, R1_SP); current_offs -= 8;
3408     __ std(R14, current_offs, R1_SP);
3409 
3410     // Store the squares, right shifted one bit (i.e., divided by 2)
3411     __ subi   (out_aux,   out,       8);
3412     __ subi   (in_aux,    in,        4);
3413     __ cmpwi  (CCR0,      in_len,    0);
3414     // Initialize lplw outside of the loop
3415     __ xorr   (lplw,      lplw,      lplw);
3416     __ ble    (CCR0,      SKIP_LOOP_SQUARE);    // in_len <= 0
3417     __ mtctr  (in_len);
3418 
3419     __ bind(LOOP_SQUARE);
3420     __ lwzu   (piece,     4,         in_aux);
3421     __ mulld  (product,   piece,     piece);
3422     // shift left 63 bits and only keep the MSB
3423     __ rldic  (lplw_s,    lplw,      63, 0);
3424     __ mr     (lplw,      product);
3425     // shift right 1 bit without sign extension
3426     __ srdi   (product,   product,   1);
3427     // join them to the same register and store it as Little Endian
3428     __ orr    (product,   lplw_s,    product);
3429     __ rldicl (product,   product,   32, 0);
3430     __ stdu   (product,   8,         out_aux);
3431     __ bdnz   (LOOP_SQUARE);
3432 
3433     __ bind(SKIP_LOOP_SQUARE);
3434 
3435     // Add in off-diagonal sums
3436     __ cmpwi  (CCR0,      in_len,    0);
3437     __ ble    (CCR0,      SKIP_DIAGONAL_SUM);
3438     // Avoid CTR usage here in order to use it at mulAdd
3439     __ subi   (i_minus1,  in_len,    1);
3440     __ li     (offset,    4);
3441 
3442     __ bind(LOOP_DIAGONAL_SUM);
3443 
3444     __ sldi   (off_aux,   out_len,   2);
3445     __ sub    (off_aux,   off_aux,   offset);
3446 
3447     __ mr     (len,       i_minus1);
3448     __ sldi   (mlen,      i_minus1,  2);
3449     __ lwzx   (t,         in,        mlen);
3450 
3451     __ muladd (out, in, off_aux, len, t, a, b, carry);
3452 
3453     // begin<addOne>
3454     // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3455     __ addi   (mlen,      mlen,      4);
3456     __ sldi   (a,         out_len,   2);
3457     __ subi   (a,         a,         4);
3458     __ sub    (a,         a,         mlen);
3459     __ subi   (off_aux,   offset,    4);
3460     __ sub    (off_aux,   a,         off_aux);
3461 
3462     __ lwzx   (b,         off_aux,   out);
3463     __ add    (b,         b,         carry);
3464     __ stwx   (b,         off_aux,   out);
3465 
3466     // if (((uint64_t)s >> 32) != 0) {
3467     __ srdi_  (a,         b,         32);
3468     __ beq    (CCR0,      SKIP_ADDONE);
3469 
3470     // while (--mlen >= 0) {
3471     __ bind(LOOP_ADDONE);
3472     __ subi   (mlen,      mlen,      4);
3473     __ cmpwi  (CCR0,      mlen,      0);
3474     __ beq    (CCR0,      SKIP_ADDONE);
3475 
3476     // if (--offset_aux < 0) { // Carry out of number
3477     __ subi   (off_aux,   off_aux,   4);
3478     __ cmpwi  (CCR0,      off_aux,   0);
3479     __ blt    (CCR0,      SKIP_ADDONE);
3480 
3481     // } else {
3482     __ lwzx   (b,         off_aux,   out);
3483     __ addi   (b,         b,         1);
3484     __ stwx   (b,         off_aux,   out);
3485     __ cmpwi  (CCR0,      b,         0);
3486     __ bne    (CCR0,      SKIP_ADDONE);
3487     __ b      (LOOP_ADDONE);
3488 
3489     __ bind(SKIP_ADDONE);
3490     // } } } end<addOne>
3491 
3492     __ addi   (offset,    offset,    8);
3493     __ subi   (i_minus1,  i_minus1,  1);
3494     __ cmpwi  (CCR0,      i_minus1,  0);
3495     __ bge    (CCR0,      LOOP_DIAGONAL_SUM);
3496 
3497     __ bind(SKIP_DIAGONAL_SUM);
3498 
3499     // Shift back up and set low bit
3500     // Shifts 1 bit left up to len positions. Assumes no leading zeros
3501     // begin<primitiveLeftShift>
3502     __ cmpwi  (CCR0,      out_len,   0);
3503     __ ble    (CCR0,      SKIP_LSHIFT);
3504     __ li     (i,         0);
3505     __ lwz    (c,         0,         out);
3506     __ subi   (b,         out_len,   1);
3507     __ mtctr  (b);
3508 
3509     __ bind(LOOP_LSHIFT);
3510     __ mr     (b,         c);
3511     __ addi   (cs,        i,         4);
3512     __ lwzx   (c,         out,       cs);
3513 
3514     __ sldi   (b,         b,         1);
3515     __ srwi   (cs,        c,         31);
3516     __ orr    (b,         b,         cs);
3517     __ stwx   (b,         i,         out);
3518 
3519     __ addi   (i,         i,         4);
3520     __ bdnz   (LOOP_LSHIFT);
3521 
3522     __ sldi   (c,         out_len,   2);
3523     __ subi   (c,         c,         4);
3524     __ lwzx   (b,         out,       c);
3525     __ sldi   (b,         b,         1);
3526     __ stwx   (b,         out,       c);
3527 
3528     __ bind(SKIP_LSHIFT);
3529     // end<primitiveLeftShift>
3530 
3531     // Set low bit
3532     __ sldi   (i,         in_len,    2);
3533     __ subi   (i,         i,         4);
3534     __ lwzx   (i,         in,        i);
3535     __ sldi   (c,         out_len,   2);
3536     __ subi   (c,         c,         4);
3537     __ lwzx   (b,         out,       c);
3538 
3539     __ andi   (i,         i,         1);
3540     __ orr    (i,         b,         i);
3541 
3542     __ stwx   (i,         out,       c);
3543 
3544     // Restore non-volatile regs.
3545     current_offs = -8;
3546     __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3547     __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3548     __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3549     __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3550     __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3551     __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3552     __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3553     __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3554     __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3555     __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3556     __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3557     __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3558     __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3559     __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3560     __ ld(R14, current_offs, R1_SP);
3561 
3562     __ mr(ret, out);
3563     __ blr();
3564 
3565     return start;
3566   }
3567 
3568   /**
3569    * Arguments:
3570    *
3571    * Inputs:
3572    *   R3_ARG1    - int   crc
3573    *   R4_ARG2    - byte* buf
3574    *   R5_ARG3    - int   length (of buffer)
3575    *
3576    * scratch:
3577    *   R2, R6-R12
3578    *
3579    * Ouput:
3580    *   R3_RET     - int   crc result
3581    */
3582   // Compute CRC32 function.
3583   address generate_CRC32_updateBytes(const char* name) {
3584     __ align(CodeEntryAlignment);
3585     StubCodeMark mark(this, "StubRoutines", name);
3586     address start = __ function_entry();  // Remember stub start address (is rtn value).


3741     // support for verify_oop (must happen after universe_init)
3742     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3743 
3744     // arraycopy stubs used by compilers
3745     generate_arraycopy_stubs();
3746 
3747     // Safefetch stubs.
3748     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3749                                                        &StubRoutines::_safefetch32_fault_pc,
3750                                                        &StubRoutines::_safefetch32_continuation_pc);
3751     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3752                                                        &StubRoutines::_safefetchN_fault_pc,
3753                                                        &StubRoutines::_safefetchN_continuation_pc);
3754 
3755 #ifdef COMPILER2
3756     if (UseMultiplyToLenIntrinsic) {
3757       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3758     }
3759 #endif
3760 
3761     if (UseSquareToLenIntrinsic) {
3762       StubRoutines::_squareToLen = generate_squareToLen();
3763     }
3764     if (UseMulAddIntrinsic) {
3765       StubRoutines::_mulAdd = generate_mulAdd();
3766     }
3767     if (UseMontgomeryMultiplyIntrinsic) {
3768       StubRoutines::_montgomeryMultiply
3769         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3770     }
3771     if (UseMontgomerySquareIntrinsic) {
3772       StubRoutines::_montgomerySquare
3773         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3774     }
3775 
3776     if (UseAESIntrinsics) {
3777       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3778       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3779     }
3780 
3781   }
3782 
3783  public:
3784   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3785     // replace the standard masm with a special one:
3786     _masm = new MacroAssembler(code);
< prev index next >