< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page
rev 13393 : Enable big endian as Martin verified

http://mail.openjdk.java.net/pipermail/hotspot-compiler-dev/2017-September/027027.html


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 





































































































































































































































































3309 
3310   /**
3311    * Arguments:
3312    *
3313    * Inputs:
3314    *   R3_ARG1    - int   crc
3315    *   R4_ARG2    - byte* buf
3316    *   R5_ARG3    - int   length (of buffer)
3317    *
3318    * scratch:
3319    *   R2, R6-R12
3320    *
3321    * Ouput:
3322    *   R3_RET     - int   crc result
3323    */
3324   // Compute CRC32 function.
3325   address generate_CRC32_updateBytes(const char* name) {
3326     __ align(CodeEntryAlignment);
3327     StubCodeMark mark(this, "StubRoutines", name);
3328     address start = __ function_entry();  // Remember stub start address (is rtn value).


3483     // support for verify_oop (must happen after universe_init)
3484     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3485 
3486     // arraycopy stubs used by compilers
3487     generate_arraycopy_stubs();
3488 
3489     // Safefetch stubs.
3490     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3491                                                        &StubRoutines::_safefetch32_fault_pc,
3492                                                        &StubRoutines::_safefetch32_continuation_pc);
3493     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494                                                        &StubRoutines::_safefetchN_fault_pc,
3495                                                        &StubRoutines::_safefetchN_continuation_pc);
3496 
3497 #ifdef COMPILER2
3498     if (UseMultiplyToLenIntrinsic) {
3499       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500     }
3501 #endif
3502 






3503     if (UseMontgomeryMultiplyIntrinsic) {
3504       StubRoutines::_montgomeryMultiply
3505         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506     }
3507     if (UseMontgomerySquareIntrinsic) {
3508       StubRoutines::_montgomerySquare
3509         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510     }
3511 
3512     if (UseAESIntrinsics) {
3513       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515     }
3516 
3517   }
3518 
3519  public:
3520   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521     // replace the standard masm with a special one:
3522     _masm = new MacroAssembler(code);


3289       const Register t0      = R2;
3290       const Register t1      = R7;
3291       const Register t2      = R8;
3292       const Register t3      = R9;
3293       const Register tc0     = R10;
3294       const Register tc1     = R11;
3295       const Register tc2     = R12;
3296 
3297       BLOCK_COMMENT("Stub body {");
3298       assert_different_registers(crc, data, dataLen, table);
3299 
3300       __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301 
3302       BLOCK_COMMENT("return");
3303       __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304       __ blr();
3305 
3306       BLOCK_COMMENT("} Stub body");
3307   }
3308 
3309   /**
3310   *  Arguments:
3311   *
3312   *  Input:
3313   *   R3_ARG1    - out address
3314   *   R4_ARG2    - in address
3315   *   R5_ARG3    - offset
3316   *   R6_ARG4    - len
3317   *   R7_ARG5    - k
3318   *  Output:
3319   *   R3_RET     - carry
3320   */
3321   address generate_mulAdd() {
3322     __ align(CodeEntryAlignment);
3323     StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324 
3325     address start = __ function_entry();
3326 
3327     // C2 does not sign extend signed parameters to full 64 bits registers:
3328     __ rldic (R5_ARG3, R5_ARG3, 2, 32);  // always positive
3329     __ clrldi(R6_ARG4, R6_ARG4, 32);     // force zero bits on higher word
3330     __ clrldi(R7_ARG5, R7_ARG5, 32);     // force zero bits on higher word
3331 
3332     __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3333 
3334     // Moves output carry to return register
3335     __ mr    (R3_RET,  R10);
3336 
3337     __ blr();
3338 
3339     return start;
3340   }
3341 
3342   /**
3343   *  Arguments:
3344   *
3345   *  Input:
3346   *   R3_ARG1    - in address
3347   *   R4_ARG2    - in length
3348   *   R5_ARG3    - out address
3349   *   R6_ARG4    - out length
3350   */
3351   address generate_squareToLen() {
3352     __ align(CodeEntryAlignment);
3353     StubCodeMark mark(this, "StubRoutines", "squareToLen");
3354 
3355     address start = __ function_entry();
3356 
3357     // args - higher word is cleaned (unsignedly) due to int to long casting
3358     const Register in        = R3_ARG1;
3359     const Register in_len    = R4_ARG2;
3360     __ clrldi(in_len, in_len, 32);
3361     const Register out       = R5_ARG3;
3362     const Register out_len   = R6_ARG4;
3363     __ clrldi(out_len, out_len, 32);
3364 
3365     // output
3366     const Register ret       = R3_RET;
3367 
3368     // temporaries
3369     const Register lplw_s    = R7;
3370     const Register in_aux    = R8;
3371     const Register out_aux   = R9;
3372     const Register piece     = R10;
3373     const Register product   = R14;
3374     const Register lplw      = R15;
3375     const Register i_minus1  = R16;
3376     const Register carry     = R17;
3377     const Register offset    = R18;
3378     const Register off_aux   = R19;
3379     const Register t         = R20;
3380     const Register mlen      = R21;
3381     const Register len       = R22;
3382     const Register a         = R23;
3383     const Register b         = R24;
3384     const Register i         = R25;
3385     const Register c         = R26;
3386     const Register cs        = R27;
3387 
3388     // Labels
3389     Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3390     Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3391 
3392     // Save non-volatile regs (frameless).
3393     int current_offs = -8;
3394     __ std(R28, current_offs, R1_SP); current_offs -= 8;
3395     __ std(R27, current_offs, R1_SP); current_offs -= 8;
3396     __ std(R26, current_offs, R1_SP); current_offs -= 8;
3397     __ std(R25, current_offs, R1_SP); current_offs -= 8;
3398     __ std(R24, current_offs, R1_SP); current_offs -= 8;
3399     __ std(R23, current_offs, R1_SP); current_offs -= 8;
3400     __ std(R22, current_offs, R1_SP); current_offs -= 8;
3401     __ std(R21, current_offs, R1_SP); current_offs -= 8;
3402     __ std(R20, current_offs, R1_SP); current_offs -= 8;
3403     __ std(R19, current_offs, R1_SP); current_offs -= 8;
3404     __ std(R18, current_offs, R1_SP); current_offs -= 8;
3405     __ std(R17, current_offs, R1_SP); current_offs -= 8;
3406     __ std(R16, current_offs, R1_SP); current_offs -= 8;
3407     __ std(R15, current_offs, R1_SP); current_offs -= 8;
3408     __ std(R14, current_offs, R1_SP);
3409 
3410     // Store the squares, right shifted one bit (i.e., divided by 2)
3411     __ subi   (out_aux,   out,       8);
3412     __ subi   (in_aux,    in,        4);
3413     __ cmpwi  (CCR0,      in_len,    0);
3414     // Initialize lplw outside of the loop
3415     __ xorr   (lplw,      lplw,      lplw);
3416     __ ble    (CCR0,      SKIP_LOOP_SQUARE);    // in_len <= 0
3417     __ mtctr  (in_len);
3418 
3419     __ bind(LOOP_SQUARE);
3420     __ lwzu   (piece,     4,         in_aux);
3421     __ mulld  (product,   piece,     piece);
3422     // shift left 63 bits and only keep the MSB
3423     __ rldic  (lplw_s,    lplw,      63, 0);
3424     __ mr     (lplw,      product);
3425     // shift right 1 bit without sign extension
3426     __ srdi   (product,   product,   1);
3427     // join them to the same register and store it
3428     __ orr    (product,   lplw_s,    product);
3429 #ifdef VM_LITTLE_ENDIAN
3430     // Swap low and high words for little endian
3431     __ rldicl (product,   product,   32, 0);
3432 #endif
3433     __ stdu   (product,   8,         out_aux);
3434     __ bdnz   (LOOP_SQUARE);
3435 
3436     __ bind(SKIP_LOOP_SQUARE);
3437 
3438     // Add in off-diagonal sums
3439     __ cmpwi  (CCR0,      in_len,    0);
3440     __ ble    (CCR0,      SKIP_DIAGONAL_SUM);
3441     // Avoid CTR usage here in order to use it at mulAdd
3442     __ subi   (i_minus1,  in_len,    1);
3443     __ li     (offset,    4);
3444 
3445     __ bind(LOOP_DIAGONAL_SUM);
3446 
3447     __ sldi   (off_aux,   out_len,   2);
3448     __ sub    (off_aux,   off_aux,   offset);
3449 
3450     __ mr     (len,       i_minus1);
3451     __ sldi   (mlen,      i_minus1,  2);
3452     __ lwzx   (t,         in,        mlen);
3453 
3454     __ muladd (out, in, off_aux, len, t, a, b, carry);
3455 
3456     // begin<addOne>
3457     // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3458     __ addi   (mlen,      mlen,      4);
3459     __ sldi   (a,         out_len,   2);
3460     __ subi   (a,         a,         4);
3461     __ sub    (a,         a,         mlen);
3462     __ subi   (off_aux,   offset,    4);
3463     __ sub    (off_aux,   a,         off_aux);
3464 
3465     __ lwzx   (b,         off_aux,   out);
3466     __ add    (b,         b,         carry);
3467     __ stwx   (b,         off_aux,   out);
3468 
3469     // if (((uint64_t)s >> 32) != 0) {
3470     __ srdi_  (a,         b,         32);
3471     __ beq    (CCR0,      SKIP_ADDONE);
3472 
3473     // while (--mlen >= 0) {
3474     __ bind(LOOP_ADDONE);
3475     __ subi   (mlen,      mlen,      4);
3476     __ cmpwi  (CCR0,      mlen,      0);
3477     __ beq    (CCR0,      SKIP_ADDONE);
3478 
3479     // if (--offset_aux < 0) { // Carry out of number
3480     __ subi   (off_aux,   off_aux,   4);
3481     __ cmpwi  (CCR0,      off_aux,   0);
3482     __ blt    (CCR0,      SKIP_ADDONE);
3483 
3484     // } else {
3485     __ lwzx   (b,         off_aux,   out);
3486     __ addi   (b,         b,         1);
3487     __ stwx   (b,         off_aux,   out);
3488     __ cmpwi  (CCR0,      b,         0);
3489     __ bne    (CCR0,      SKIP_ADDONE);
3490     __ b      (LOOP_ADDONE);
3491 
3492     __ bind(SKIP_ADDONE);
3493     // } } } end<addOne>
3494 
3495     __ addi   (offset,    offset,    8);
3496     __ subi   (i_minus1,  i_minus1,  1);
3497     __ cmpwi  (CCR0,      i_minus1,  0);
3498     __ bge    (CCR0,      LOOP_DIAGONAL_SUM);
3499 
3500     __ bind(SKIP_DIAGONAL_SUM);
3501 
3502     // Shift back up and set low bit
3503     // Shifts 1 bit left up to len positions. Assumes no leading zeros
3504     // begin<primitiveLeftShift>
3505     __ cmpwi  (CCR0,      out_len,   0);
3506     __ ble    (CCR0,      SKIP_LSHIFT);
3507     __ li     (i,         0);
3508     __ lwz    (c,         0,         out);
3509     __ subi   (b,         out_len,   1);
3510     __ mtctr  (b);
3511 
3512     __ bind(LOOP_LSHIFT);
3513     __ mr     (b,         c);
3514     __ addi   (cs,        i,         4);
3515     __ lwzx   (c,         out,       cs);
3516 
3517     __ sldi   (b,         b,         1);
3518     __ srwi   (cs,        c,         31);
3519     __ orr    (b,         b,         cs);
3520     __ stwx   (b,         i,         out);
3521 
3522     __ addi   (i,         i,         4);
3523     __ bdnz   (LOOP_LSHIFT);
3524 
3525     __ sldi   (c,         out_len,   2);
3526     __ subi   (c,         c,         4);
3527     __ lwzx   (b,         out,       c);
3528     __ sldi   (b,         b,         1);
3529     __ stwx   (b,         out,       c);
3530 
3531     __ bind(SKIP_LSHIFT);
3532     // end<primitiveLeftShift>
3533 
3534     // Set low bit
3535     __ sldi   (i,         in_len,    2);
3536     __ subi   (i,         i,         4);
3537     __ lwzx   (i,         in,        i);
3538     __ sldi   (c,         out_len,   2);
3539     __ subi   (c,         c,         4);
3540     __ lwzx   (b,         out,       c);
3541 
3542     __ andi   (i,         i,         1);
3543     __ orr    (i,         b,         i);
3544 
3545     __ stwx   (i,         out,       c);
3546 
3547     // Restore non-volatile regs.
3548     current_offs = -8;
3549     __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3550     __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3551     __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3552     __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3553     __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3554     __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3555     __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3556     __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3557     __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3558     __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3559     __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3560     __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3561     __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3562     __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3563     __ ld(R14, current_offs, R1_SP);
3564 
3565     __ mr(ret, out);
3566     __ blr();
3567 
3568     return start;
3569   }
3570 
3571   /**
3572    * Arguments:
3573    *
3574    * Inputs:
3575    *   R3_ARG1    - int   crc
3576    *   R4_ARG2    - byte* buf
3577    *   R5_ARG3    - int   length (of buffer)
3578    *
3579    * scratch:
3580    *   R2, R6-R12
3581    *
3582    * Ouput:
3583    *   R3_RET     - int   crc result
3584    */
3585   // Compute CRC32 function.
3586   address generate_CRC32_updateBytes(const char* name) {
3587     __ align(CodeEntryAlignment);
3588     StubCodeMark mark(this, "StubRoutines", name);
3589     address start = __ function_entry();  // Remember stub start address (is rtn value).


3744     // support for verify_oop (must happen after universe_init)
3745     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3746 
3747     // arraycopy stubs used by compilers
3748     generate_arraycopy_stubs();
3749 
3750     // Safefetch stubs.
3751     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3752                                                        &StubRoutines::_safefetch32_fault_pc,
3753                                                        &StubRoutines::_safefetch32_continuation_pc);
3754     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3755                                                        &StubRoutines::_safefetchN_fault_pc,
3756                                                        &StubRoutines::_safefetchN_continuation_pc);
3757 
3758 #ifdef COMPILER2
3759     if (UseMultiplyToLenIntrinsic) {
3760       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3761     }
3762 #endif
3763 
3764     if (UseSquareToLenIntrinsic) {
3765       StubRoutines::_squareToLen = generate_squareToLen();
3766     }
3767     if (UseMulAddIntrinsic) {
3768       StubRoutines::_mulAdd = generate_mulAdd();
3769     }
3770     if (UseMontgomeryMultiplyIntrinsic) {
3771       StubRoutines::_montgomeryMultiply
3772         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3773     }
3774     if (UseMontgomerySquareIntrinsic) {
3775       StubRoutines::_montgomerySquare
3776         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3777     }
3778 
3779     if (UseAESIntrinsics) {
3780       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3781       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3782     }
3783 
3784   }
3785 
3786  public:
3787   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3788     // replace the standard masm with a special one:
3789     _masm = new MacroAssembler(code);
< prev index next >