3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309
3310 /**
3311 * Arguments:
3312 *
3313 * Inputs:
3314 * R3_ARG1 - int crc
3315 * R4_ARG2 - byte* buf
3316 * R5_ARG3 - int length (of buffer)
3317 *
3318 * scratch:
3319 * R2, R6-R12
3320 *
3321 * Ouput:
3322 * R3_RET - int crc result
3323 */
3324 // Compute CRC32 function.
3325 address generate_CRC32_updateBytes(const char* name) {
3326 __ align(CodeEntryAlignment);
3327 StubCodeMark mark(this, "StubRoutines", name);
3328 address start = __ function_entry(); // Remember stub start address (is rtn value).
3483 // support for verify_oop (must happen after universe_init)
3484 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3485
3486 // arraycopy stubs used by compilers
3487 generate_arraycopy_stubs();
3488
3489 // Safefetch stubs.
3490 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3491 &StubRoutines::_safefetch32_fault_pc,
3492 &StubRoutines::_safefetch32_continuation_pc);
3493 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494 &StubRoutines::_safefetchN_fault_pc,
3495 &StubRoutines::_safefetchN_continuation_pc);
3496
3497 #ifdef COMPILER2
3498 if (UseMultiplyToLenIntrinsic) {
3499 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500 }
3501 #endif
3502
3503 if (UseMontgomeryMultiplyIntrinsic) {
3504 StubRoutines::_montgomeryMultiply
3505 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506 }
3507 if (UseMontgomerySquareIntrinsic) {
3508 StubRoutines::_montgomerySquare
3509 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510 }
3511
3512 if (UseAESIntrinsics) {
3513 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515 }
3516
3517 }
3518
3519 public:
3520 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521 // replace the standard masm with a special one:
3522 _masm = new MacroAssembler(code);
|
3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309 /**
3310 * Arguments:
3311 *
3312 * Input:
3313 * R3_ARG1 - out address
3314 * R4_ARG2 - in address
3315 * R5_ARG3 - offset
3316 * R6_ARG4 - len
3317 * R7_ARG5 - k
3318 * Output:
3319 * R3_RET - carry
3320 */
3321 address generate_mulAdd() {
3322 __ align(CodeEntryAlignment);
3323 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324
3325 address start = __ function_entry();
3326
3327 // C2 does not sign extend signed parameters to full 64 bits registers:
3328 __ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
3329 __ clrldi(R6_ARG4, R6_ARG4, 32); // force zero bits on higher word
3330 __ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
3331
3332 __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3333
3334 // Moves output carry to return register
3335 __ mr (R3_RET, R10);
3336
3337 __ blr();
3338
3339 return start;
3340 }
3341
3342 /**
3343 * Arguments:
3344 *
3345 * Input:
3346 * R3_ARG1 - in address
3347 * R4_ARG2 - in length
3348 * R5_ARG3 - out address
3349 * R6_ARG4 - out length
3350 */
3351 address generate_squareToLen() {
3352 __ align(CodeEntryAlignment);
3353 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3354
3355 address start = __ function_entry();
3356
3357 // args - higher word is cleaned (unsignedly) due to int to long casting
3358 const Register in = R3_ARG1;
3359 const Register in_len = R4_ARG2;
3360 __ clrldi(in_len, in_len, 32);
3361 const Register out = R5_ARG3;
3362 const Register out_len = R6_ARG4;
3363 __ clrldi(out_len, out_len, 32);
3364
3365 // output
3366 const Register ret = R3_RET;
3367
3368 // temporaries
3369 const Register lplw_s = R7;
3370 const Register in_aux = R8;
3371 const Register out_aux = R9;
3372 const Register piece = R10;
3373 const Register product = R14;
3374 const Register lplw = R15;
3375 const Register i_minus1 = R16;
3376 const Register carry = R17;
3377 const Register offset = R18;
3378 const Register off_aux = R19;
3379 const Register t = R20;
3380 const Register mlen = R21;
3381 const Register len = R22;
3382 const Register a = R23;
3383 const Register b = R24;
3384 const Register i = R25;
3385 const Register c = R26;
3386 const Register cs = R27;
3387
3388 // Labels
3389 Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3390 Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3391
3392 // Save non-volatile regs (frameless).
3393 int current_offs = -8;
3394 __ std(R28, current_offs, R1_SP); current_offs -= 8;
3395 __ std(R27, current_offs, R1_SP); current_offs -= 8;
3396 __ std(R26, current_offs, R1_SP); current_offs -= 8;
3397 __ std(R25, current_offs, R1_SP); current_offs -= 8;
3398 __ std(R24, current_offs, R1_SP); current_offs -= 8;
3399 __ std(R23, current_offs, R1_SP); current_offs -= 8;
3400 __ std(R22, current_offs, R1_SP); current_offs -= 8;
3401 __ std(R21, current_offs, R1_SP); current_offs -= 8;
3402 __ std(R20, current_offs, R1_SP); current_offs -= 8;
3403 __ std(R19, current_offs, R1_SP); current_offs -= 8;
3404 __ std(R18, current_offs, R1_SP); current_offs -= 8;
3405 __ std(R17, current_offs, R1_SP); current_offs -= 8;
3406 __ std(R16, current_offs, R1_SP); current_offs -= 8;
3407 __ std(R15, current_offs, R1_SP); current_offs -= 8;
3408 __ std(R14, current_offs, R1_SP);
3409
3410 // Store the squares, right shifted one bit (i.e., divided by 2)
3411 __ subi (out_aux, out, 8);
3412 __ subi (in_aux, in, 4);
3413 __ cmpwi (CCR0, in_len, 0);
3414 // Initialize lplw outside of the loop
3415 __ xorr (lplw, lplw, lplw);
3416 __ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
3417 __ mtctr (in_len);
3418
3419 __ bind(LOOP_SQUARE);
3420 __ lwzu (piece, 4, in_aux);
3421 __ mulld (product, piece, piece);
3422 // shift left 63 bits and only keep the MSB
3423 __ rldic (lplw_s, lplw, 63, 0);
3424 __ mr (lplw, product);
3425 // shift right 1 bit without sign extension
3426 __ srdi (product, product, 1);
3427 // join them to the same register and store it
3428 __ orr (product, lplw_s, product);
3429 #ifdef VM_LITTLE_ENDIAN
3430 // Swap low and high words for little endian
3431 __ rldicl (product, product, 32, 0);
3432 #endif
3433 __ stdu (product, 8, out_aux);
3434 __ bdnz (LOOP_SQUARE);
3435
3436 __ bind(SKIP_LOOP_SQUARE);
3437
3438 // Add in off-diagonal sums
3439 __ cmpwi (CCR0, in_len, 0);
3440 __ ble (CCR0, SKIP_DIAGONAL_SUM);
3441 // Avoid CTR usage here in order to use it at mulAdd
3442 __ subi (i_minus1, in_len, 1);
3443 __ li (offset, 4);
3444
3445 __ bind(LOOP_DIAGONAL_SUM);
3446
3447 __ sldi (off_aux, out_len, 2);
3448 __ sub (off_aux, off_aux, offset);
3449
3450 __ mr (len, i_minus1);
3451 __ sldi (mlen, i_minus1, 2);
3452 __ lwzx (t, in, mlen);
3453
3454 __ muladd (out, in, off_aux, len, t, a, b, carry);
3455
3456 // begin<addOne>
3457 // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3458 __ addi (mlen, mlen, 4);
3459 __ sldi (a, out_len, 2);
3460 __ subi (a, a, 4);
3461 __ sub (a, a, mlen);
3462 __ subi (off_aux, offset, 4);
3463 __ sub (off_aux, a, off_aux);
3464
3465 __ lwzx (b, off_aux, out);
3466 __ add (b, b, carry);
3467 __ stwx (b, off_aux, out);
3468
3469 // if (((uint64_t)s >> 32) != 0) {
3470 __ srdi_ (a, b, 32);
3471 __ beq (CCR0, SKIP_ADDONE);
3472
3473 // while (--mlen >= 0) {
3474 __ bind(LOOP_ADDONE);
3475 __ subi (mlen, mlen, 4);
3476 __ cmpwi (CCR0, mlen, 0);
3477 __ beq (CCR0, SKIP_ADDONE);
3478
3479 // if (--offset_aux < 0) { // Carry out of number
3480 __ subi (off_aux, off_aux, 4);
3481 __ cmpwi (CCR0, off_aux, 0);
3482 __ blt (CCR0, SKIP_ADDONE);
3483
3484 // } else {
3485 __ lwzx (b, off_aux, out);
3486 __ addi (b, b, 1);
3487 __ stwx (b, off_aux, out);
3488 __ cmpwi (CCR0, b, 0);
3489 __ bne (CCR0, SKIP_ADDONE);
3490 __ b (LOOP_ADDONE);
3491
3492 __ bind(SKIP_ADDONE);
3493 // } } } end<addOne>
3494
3495 __ addi (offset, offset, 8);
3496 __ subi (i_minus1, i_minus1, 1);
3497 __ cmpwi (CCR0, i_minus1, 0);
3498 __ bge (CCR0, LOOP_DIAGONAL_SUM);
3499
3500 __ bind(SKIP_DIAGONAL_SUM);
3501
3502 // Shift back up and set low bit
3503 // Shifts 1 bit left up to len positions. Assumes no leading zeros
3504 // begin<primitiveLeftShift>
3505 __ cmpwi (CCR0, out_len, 0);
3506 __ ble (CCR0, SKIP_LSHIFT);
3507 __ li (i, 0);
3508 __ lwz (c, 0, out);
3509 __ subi (b, out_len, 1);
3510 __ mtctr (b);
3511
3512 __ bind(LOOP_LSHIFT);
3513 __ mr (b, c);
3514 __ addi (cs, i, 4);
3515 __ lwzx (c, out, cs);
3516
3517 __ sldi (b, b, 1);
3518 __ srwi (cs, c, 31);
3519 __ orr (b, b, cs);
3520 __ stwx (b, i, out);
3521
3522 __ addi (i, i, 4);
3523 __ bdnz (LOOP_LSHIFT);
3524
3525 __ sldi (c, out_len, 2);
3526 __ subi (c, c, 4);
3527 __ lwzx (b, out, c);
3528 __ sldi (b, b, 1);
3529 __ stwx (b, out, c);
3530
3531 __ bind(SKIP_LSHIFT);
3532 // end<primitiveLeftShift>
3533
3534 // Set low bit
3535 __ sldi (i, in_len, 2);
3536 __ subi (i, i, 4);
3537 __ lwzx (i, in, i);
3538 __ sldi (c, out_len, 2);
3539 __ subi (c, c, 4);
3540 __ lwzx (b, out, c);
3541
3542 __ andi (i, i, 1);
3543 __ orr (i, b, i);
3544
3545 __ stwx (i, out, c);
3546
3547 // Restore non-volatile regs.
3548 current_offs = -8;
3549 __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3550 __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3551 __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3552 __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3553 __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3554 __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3555 __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3556 __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3557 __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3558 __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3559 __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3560 __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3561 __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3562 __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3563 __ ld(R14, current_offs, R1_SP);
3564
3565 __ mr(ret, out);
3566 __ blr();
3567
3568 return start;
3569 }
3570
3571 /**
3572 * Arguments:
3573 *
3574 * Inputs:
3575 * R3_ARG1 - int crc
3576 * R4_ARG2 - byte* buf
3577 * R5_ARG3 - int length (of buffer)
3578 *
3579 * scratch:
3580 * R2, R6-R12
3581 *
3582 * Ouput:
3583 * R3_RET - int crc result
3584 */
3585 // Compute CRC32 function.
3586 address generate_CRC32_updateBytes(const char* name) {
3587 __ align(CodeEntryAlignment);
3588 StubCodeMark mark(this, "StubRoutines", name);
3589 address start = __ function_entry(); // Remember stub start address (is rtn value).
3744 // support for verify_oop (must happen after universe_init)
3745 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3746
3747 // arraycopy stubs used by compilers
3748 generate_arraycopy_stubs();
3749
3750 // Safefetch stubs.
3751 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3752 &StubRoutines::_safefetch32_fault_pc,
3753 &StubRoutines::_safefetch32_continuation_pc);
3754 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3755 &StubRoutines::_safefetchN_fault_pc,
3756 &StubRoutines::_safefetchN_continuation_pc);
3757
3758 #ifdef COMPILER2
3759 if (UseMultiplyToLenIntrinsic) {
3760 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3761 }
3762 #endif
3763
3764 if (UseSquareToLenIntrinsic) {
3765 StubRoutines::_squareToLen = generate_squareToLen();
3766 }
3767 if (UseMulAddIntrinsic) {
3768 StubRoutines::_mulAdd = generate_mulAdd();
3769 }
3770 if (UseMontgomeryMultiplyIntrinsic) {
3771 StubRoutines::_montgomeryMultiply
3772 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3773 }
3774 if (UseMontgomerySquareIntrinsic) {
3775 StubRoutines::_montgomerySquare
3776 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3777 }
3778
3779 if (UseAESIntrinsics) {
3780 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3781 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3782 }
3783
3784 }
3785
3786 public:
3787 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3788 // replace the standard masm with a special one:
3789 _masm = new MacroAssembler(code);
|