3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309
3310 /**
3311 * Arguments:
3312 *
3313 * Inputs:
3314 * R3_ARG1 - int crc
3315 * R4_ARG2 - byte* buf
3316 * R5_ARG3 - int length (of buffer)
3317 *
3318 * scratch:
3319 * R2, R6-R12
3320 *
3321 * Ouput:
3322 * R3_RET - int crc result
3323 */
3324 // Compute CRC32 function.
3325 address generate_CRC32_updateBytes(const char* name) {
3326 __ align(CodeEntryAlignment);
3327 StubCodeMark mark(this, "StubRoutines", name);
3328 address start = __ function_entry(); // Remember stub start address (is rtn value).
3483 // support for verify_oop (must happen after universe_init)
3484 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3485
3486 // arraycopy stubs used by compilers
3487 generate_arraycopy_stubs();
3488
3489 // Safefetch stubs.
3490 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3491 &StubRoutines::_safefetch32_fault_pc,
3492 &StubRoutines::_safefetch32_continuation_pc);
3493 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494 &StubRoutines::_safefetchN_fault_pc,
3495 &StubRoutines::_safefetchN_continuation_pc);
3496
3497 #ifdef COMPILER2
3498 if (UseMultiplyToLenIntrinsic) {
3499 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500 }
3501 #endif
3502
3503 if (UseMontgomeryMultiplyIntrinsic) {
3504 StubRoutines::_montgomeryMultiply
3505 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506 }
3507 if (UseMontgomerySquareIntrinsic) {
3508 StubRoutines::_montgomerySquare
3509 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510 }
3511
3512 if (UseAESIntrinsics) {
3513 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515 }
3516
3517 }
3518
3519 public:
3520 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521 // replace the standard masm with a special one:
3522 _masm = new MacroAssembler(code);
|
3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309 /**
3310 * Arguments:
3311 *
3312 * Input:
3313 * R3_ARG1 - out address
3314 * R4_ARG2 - in address
3315 * R5_ARG3 - offset
3316 * R6_ARG4 - len
3317 * R7_ARG5 - k
3318 * Output:
3319 * R3_RET - carry
3320 */
3321 address generate_mulAdd() {
3322 __ align(CodeEntryAlignment);
3323 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324
3325 address start = __ function_entry();
3326
3327 // C2 does not sign extend signed parameters to full 64 bits registers:
3328 __ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
3329 __ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
3330
3331 __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3332
3333 // Moves output carry to return register
3334 __ mr (R3_RET, R10);
3335
3336 __ blr();
3337
3338 return start;
3339 }
3340
3341 /**
3342 * Arguments:
3343 *
3344 * Input:
3345 * R3_ARG1 - in address
3346 * R4_ARG2 - in length
3347 * R5_ARG3 - out address
3348 * R6_ARG4 - out length
3349 */
3350 address generate_squareToLen() {
3351 __ align(CodeEntryAlignment);
3352 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3353
3354 address start = __ function_entry();
3355
3356 // args
3357 const Register in = R3_ARG1;
3358 const Register in_len = R4_ARG2;
3359 const Register out = R5_ARG3;
3360 const Register out_len = R6_ARG4;
3361
3362 // output
3363 const Register ret = R3_RET;
3364
3365 // temporaries
3366 const Register lplw_s = R7;
3367 const Register in_aux = R8;
3368 const Register out_aux = R9;
3369 const Register piece = R10;
3370 const Register product = R14;
3371 const Register lplw = R15;
3372 const Register i_minus1 = R16;
3373 const Register carry = R17;
3374 const Register offset = R18;
3375 const Register off_aux = R19;
3376 const Register t = R20;
3377 const Register mlen = R21;
3378 const Register len = R22;
3379 const Register a = R23;
3380 const Register b = R24;
3381 const Register i = R25;
3382 const Register c = R26;
3383 const Register cs = R27;
3384
3385 // Labels
3386 Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3387 Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3388
3389 // Save non-volatile regs (frameless).
3390 int current_offs = -8;
3391 __ std(R28, current_offs, R1_SP); current_offs -= 8;
3392 __ std(R27, current_offs, R1_SP); current_offs -= 8;
3393 __ std(R26, current_offs, R1_SP); current_offs -= 8;
3394 __ std(R25, current_offs, R1_SP); current_offs -= 8;
3395 __ std(R24, current_offs, R1_SP); current_offs -= 8;
3396 __ std(R23, current_offs, R1_SP); current_offs -= 8;
3397 __ std(R22, current_offs, R1_SP); current_offs -= 8;
3398 __ std(R21, current_offs, R1_SP); current_offs -= 8;
3399 __ std(R20, current_offs, R1_SP); current_offs -= 8;
3400 __ std(R19, current_offs, R1_SP); current_offs -= 8;
3401 __ std(R18, current_offs, R1_SP); current_offs -= 8;
3402 __ std(R17, current_offs, R1_SP); current_offs -= 8;
3403 __ std(R16, current_offs, R1_SP); current_offs -= 8;
3404 __ std(R15, current_offs, R1_SP); current_offs -= 8;
3405 __ std(R14, current_offs, R1_SP);
3406
3407 // Store the squares, right shifted one bit (i.e., divided by 2)
3408 __ subi (out_aux, out, 8);
3409 __ subi (in_aux, in, 4);
3410 __ cmpwi (CCR0, in_len, 0);
3411 // Initialize lplw outside of the loop
3412 __ xorr (lplw, lplw, lplw);
3413 __ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
3414 __ mtctr (in_len);
3415
3416 __ bind(LOOP_SQUARE);
3417 __ lwzu (piece, 4, in_aux);
3418 __ mulld (product, piece, piece);
3419 // shift left 63 bits and only keep the MSB
3420 __ rldic (lplw_s, lplw, 63, 0);
3421 __ mr (lplw, product);
3422 // shift right 1 bit without sign extension
3423 __ srdi (product, product, 1);
3424 // join them to the same register and store it as Little Endian
3425 __ orr (product, lplw_s, product);
3426 __ rldicl (product, product, 32, 0);
3427 __ stdu (product, 8, out_aux);
3428 __ bdnz (LOOP_SQUARE);
3429
3430 __ bind(SKIP_LOOP_SQUARE);
3431
3432 // Add in off-diagonal sums
3433 __ cmpwi (CCR0, in_len, 0);
3434 __ ble (CCR0, SKIP_DIAGONAL_SUM);
3435 // Avoid CTR usage here in order to use it at mulAdd
3436 __ subi (i_minus1, in_len, 1);
3437 __ li (offset, 4);
3438
3439 __ bind(LOOP_DIAGONAL_SUM);
3440
3441 __ sldi (off_aux, out_len, 2);
3442 __ sub (off_aux, off_aux, offset);
3443
3444 __ mr (len, i_minus1);
3445 __ sldi (mlen, i_minus1, 2);
3446 __ lwzx (t, in, mlen);
3447
3448 __ muladd (out, in, off_aux, len, t, a, b, carry);
3449
3450 // begin<addOne>
3451 // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3452 __ addi (mlen, mlen, 4);
3453 __ sldi (a, out_len, 2);
3454 __ subi (a, a, 4);
3455 __ sub (a, a, mlen);
3456 __ subi (off_aux, offset, 4);
3457 __ sub (off_aux, a, off_aux);
3458
3459 __ lwzx (b, off_aux, out);
3460 __ add (b, b, carry);
3461 __ stwx (b, off_aux, out);
3462
3463 // if (((uint64_t)s >> 32) != 0) {
3464 __ srdi_ (a, b, 32);
3465 __ beq (CCR0, SKIP_ADDONE);
3466
3467 // while (--mlen >= 0) {
3468 __ bind(LOOP_ADDONE);
3469 __ subi (mlen, mlen, 4);
3470 __ cmpwi (CCR0, mlen, 0);
3471 __ beq (CCR0, SKIP_ADDONE);
3472
3473 // if (--offset_aux < 0) { // Carry out of number
3474 __ subi (off_aux, off_aux, 4);
3475 __ cmpwi (CCR0, off_aux, 0);
3476 __ blt (CCR0, SKIP_ADDONE);
3477
3478 // } else {
3479 __ lwzx (b, off_aux, out);
3480 __ addi (b, b, 1);
3481 __ stwx (b, off_aux, out);
3482 __ cmpwi (CCR0, b, 0);
3483 __ bne (CCR0, SKIP_ADDONE);
3484 __ b (LOOP_ADDONE);
3485
3486 __ bind(SKIP_ADDONE);
3487 // } } } end<addOne>
3488
3489 __ addi (offset, offset, 8);
3490 __ subi (i_minus1, i_minus1, 1);
3491 __ cmpwi (CCR0, i_minus1, 0);
3492 __ bge (CCR0, LOOP_DIAGONAL_SUM);
3493
3494 __ bind(SKIP_DIAGONAL_SUM);
3495
3496 // Shift back up and set low bit
3497 // Shifts 1 bit left up to len positions. Assumes no leading zeros
3498 // begin<primitiveLeftShift>
3499 __ cmpwi (CCR0, out_len, 0);
3500 __ ble (CCR0, SKIP_LSHIFT);
3501 __ li (i, 0);
3502 __ lwz (c, 0, out);
3503 __ subi (b, out_len, 1);
3504 __ mtctr (b);
3505
3506 __ bind(LOOP_LSHIFT);
3507 __ mr (b, c);
3508 __ addi (cs, i, 4);
3509 __ lwzx (c, out, cs);
3510
3511 __ sldi (b, b, 1);
3512 __ srwi (cs, c, 31);
3513 __ orr (b, b, cs);
3514 __ stwx (b, i, out);
3515
3516 __ addi (i, i, 4);
3517 __ bdnz (LOOP_LSHIFT);
3518
3519 __ sldi (c, out_len, 2);
3520 __ subi (c, c, 4);
3521 __ lwzx (b, out, c);
3522 __ sldi (b, b, 1);
3523 __ stwx (b, out, c);
3524
3525 __ bind(SKIP_LSHIFT);
3526 // end<primitiveLeftShift>
3527
3528 // Set low bit
3529 __ sldi (i, in_len, 2);
3530 __ subi (i, i, 4);
3531 __ lwzx (i, in, i);
3532 __ sldi (c, out_len, 2);
3533 __ subi (c, c, 4);
3534 __ lwzx (b, out, c);
3535
3536 __ andi (i, i, 1);
3537 __ orr (i, b, i);
3538
3539 __ stwx (i, out, c);
3540
3541 // Restore non-volatile regs.
3542 current_offs = -8;
3543 __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3544 __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3545 __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3546 __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3547 __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3548 __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3549 __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3550 __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3551 __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3552 __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3553 __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3554 __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3555 __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3556 __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3557 __ ld(R14, current_offs, R1_SP);
3558
3559 __ mr(ret, out);
3560 __ blr();
3561
3562 return start;
3563 }
3564
3565 /**
3566 * Arguments:
3567 *
3568 * Inputs:
3569 * R3_ARG1 - int crc
3570 * R4_ARG2 - byte* buf
3571 * R5_ARG3 - int length (of buffer)
3572 *
3573 * scratch:
3574 * R2, R6-R12
3575 *
3576 * Ouput:
3577 * R3_RET - int crc result
3578 */
3579 // Compute CRC32 function.
3580 address generate_CRC32_updateBytes(const char* name) {
3581 __ align(CodeEntryAlignment);
3582 StubCodeMark mark(this, "StubRoutines", name);
3583 address start = __ function_entry(); // Remember stub start address (is rtn value).
3738 // support for verify_oop (must happen after universe_init)
3739 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3740
3741 // arraycopy stubs used by compilers
3742 generate_arraycopy_stubs();
3743
3744 // Safefetch stubs.
3745 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3746 &StubRoutines::_safefetch32_fault_pc,
3747 &StubRoutines::_safefetch32_continuation_pc);
3748 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3749 &StubRoutines::_safefetchN_fault_pc,
3750 &StubRoutines::_safefetchN_continuation_pc);
3751
3752 #ifdef COMPILER2
3753 if (UseMultiplyToLenIntrinsic) {
3754 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3755 }
3756 #endif
3757
3758 if (UseSquareToLenIntrinsic) {
3759 StubRoutines::_squareToLen = generate_squareToLen();
3760 }
3761 if (UseMulAddIntrinsic) {
3762 StubRoutines::_mulAdd = generate_mulAdd();
3763 }
3764 if (UseMontgomeryMultiplyIntrinsic) {
3765 StubRoutines::_montgomeryMultiply
3766 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3767 }
3768 if (UseMontgomerySquareIntrinsic) {
3769 StubRoutines::_montgomerySquare
3770 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3771 }
3772
3773 if (UseAESIntrinsics) {
3774 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3775 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3776 }
3777
3778 }
3779
3780 public:
3781 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3782 // replace the standard masm with a special one:
3783 _masm = new MacroAssembler(code);
|