3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309
3310 /**
3311 * Arguments:
3312 *
3313 * Inputs:
3314 * R3_ARG1 - int crc
3315 * R4_ARG2 - byte* buf
3316 * R5_ARG3 - int length (of buffer)
3317 *
3318 * scratch:
3319 * R2, R6-R12
3320 *
3321 * Ouput:
3322 * R3_RET - int crc result
3323 */
3324 // Compute CRC32 function.
3325 address generate_CRC32_updateBytes(const char* name) {
3326 __ align(CodeEntryAlignment);
3327 StubCodeMark mark(this, "StubRoutines", name);
3328 address start = __ function_entry(); // Remember stub start address (is rtn value).
3483 // support for verify_oop (must happen after universe_init)
3484 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3485
3486 // arraycopy stubs used by compilers
3487 generate_arraycopy_stubs();
3488
3489 // Safefetch stubs.
3490 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3491 &StubRoutines::_safefetch32_fault_pc,
3492 &StubRoutines::_safefetch32_continuation_pc);
3493 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494 &StubRoutines::_safefetchN_fault_pc,
3495 &StubRoutines::_safefetchN_continuation_pc);
3496
3497 #ifdef COMPILER2
3498 if (UseMultiplyToLenIntrinsic) {
3499 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500 }
3501 #endif
3502
3503 if (UseMontgomeryMultiplyIntrinsic) {
3504 StubRoutines::_montgomeryMultiply
3505 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506 }
3507 if (UseMontgomerySquareIntrinsic) {
3508 StubRoutines::_montgomerySquare
3509 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510 }
3511
3512 if (UseAESIntrinsics) {
3513 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515 }
3516
3517 }
3518
3519 public:
3520 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521 // replace the standard masm with a special one:
3522 _masm = new MacroAssembler(code);
|
3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309 /**
3310 * Arguments:
3311 *
3312 * Input:
3313 * R3_ARG1 - out address
3314 * R4_ARG2 - in address
3315 * R5_ARG3 - offset
3316 * R6_ARG4 - len
3317 * R7_ARG5 - k
3318 * Output:
3319 * R3_RET - carry
3320 */
3321 address generate_mulAdd() {
3322 __ align(CodeEntryAlignment);
3323 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324
3325 address start = __ function_entry();
3326
3327 // C2 does not sign extend signed parameters to full 64 bits registers:
3328 __ rldicl (R5_ARG3, R5_ARG3, 2, 32); // always positive
3329 __ clrldi (R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
3330
3331 __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3332
3333 // Moves output carry to return register
3334 __ mr(R3_RET, R10);
3335
3336 __ blr();
3337
3338 return start;
3339 }
3340
3341 /**
3342 * Arguments:
3343 *
3344 * Input:
3345 * R3_ARG1 - in address
3346 * R4_ARG2 - in length
3347 * R5_ARG3 - out address
3348 * R6_ARG4 - out length
3349 */
3350 address generate_squareToLen() {
3351 __ align(CodeEntryAlignment);
3352 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3353
3354 address start = __ function_entry();
3355
3356 // args
3357 const Register in = R3_ARG1;
3358 const Register in_len = R4_ARG2;
3359 const Register out = R5_ARG3;
3360 const Register out_len = R6_ARG4;
3361
3362 // output
3363 const Register ret = R3_RET;
3364
3365 // temporaries
3366 const Register lplw_s = R7;
3367 const Register in_aux = R8;
3368 const Register out_aux = R9;
3369 const Register piece = R10;
3370 const Register product = R14;
3371 const Register product_s = R15;
3372 const Register lplw = R16;
3373 const Register i_minus1 = R17;
3374 const Register carry = R18;
3375 const Register offset = R19;
3376 const Register off_aux = R20;
3377 const Register t = R21;
3378 const Register mlen = R22;
3379 const Register len = R23;
3380 const Register a = R24;
3381 const Register b = R25;
3382 const Register i = R26;
3383 const Register c = R27;
3384 const Register cs = R28;
3385
3386 // Labels
3387 Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3388 Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3389
3390 // Save non-volatile regs (frameless).
3391 int current_offs = -8;
3392 __ std(R28, current_offs, R1_SP); current_offs -= 8;
3393 __ std(R27, current_offs, R1_SP); current_offs -= 8;
3394 __ std(R26, current_offs, R1_SP); current_offs -= 8;
3395 __ std(R25, current_offs, R1_SP); current_offs -= 8;
3396 __ std(R24, current_offs, R1_SP); current_offs -= 8;
3397 __ std(R23, current_offs, R1_SP); current_offs -= 8;
3398 __ std(R22, current_offs, R1_SP); current_offs -= 8;
3399 __ std(R21, current_offs, R1_SP); current_offs -= 8;
3400 __ std(R20, current_offs, R1_SP); current_offs -= 8;
3401 __ std(R19, current_offs, R1_SP); current_offs -= 8;
3402 __ std(R18, current_offs, R1_SP); current_offs -= 8;
3403 __ std(R17, current_offs, R1_SP); current_offs -= 8;
3404 __ std(R16, current_offs, R1_SP); current_offs -= 8;
3405 __ std(R15, current_offs, R1_SP); current_offs -= 8;
3406 __ std(R14, current_offs, R1_SP);
3407
3408 // Store the squares, right shifted one bit (i.e., divided by 2)
3409 __ subi (out_aux, out, 8);
3410 __ subi (in_aux, in, 4);
3411 __ cmpwi (CCR0, in_len, 0);
3412 // Initialize lplw outside of the loop
3413 __ xorr (lplw, lplw, lplw);
3414 __ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
3415 __ mtctr (in_len);
3416
3417 __ bind(LOOP_SQUARE);
3418 __ lwzu (piece, 4, in_aux);
3419 __ mulld (product, piece, piece);
3420 // shift left 31 bits and only keep the 31th bit
3421 __ rlwinm (lplw_s, lplw, 31, 0, 0);
3422 // shift right 33 bits without sign extension
3423 __ srdi (product_s, product, 33);
3424 __ orr (product_s, lplw_s, product_s);
3425 __ mr (lplw, product);
3426 __ rldicl (product, product, 31, 31);
3427 __ orr (product, product, product_s);
3428 __ stdu (product, 8, out_aux);
3429 __ bdnz (LOOP_SQUARE);
3430
3431 __ bind(SKIP_LOOP_SQUARE);
3432
3433 // Add in off-diagonal sums
3434 __ cmpwi (CCR0, in_len, 0);
3435 __ ble (CCR0, SKIP_DIAGONAL_SUM);
3436 // Avoid CTR usage here in order to use it at mulAdd
3437 __ subi (i_minus1, in_len, 1);
3438 __ li (offset, 4);
3439
3440 __ bind(LOOP_DIAGONAL_SUM);
3441
3442 __ sldi (off_aux, out_len, 2);
3443 __ sub (off_aux, off_aux, offset);
3444
3445 __ mr (len, i_minus1);
3446 __ sldi (mlen, i_minus1, 2);
3447 __ lwzx (t, in, mlen);
3448
3449 __ muladd (out, in, off_aux, len, t, a, b, carry);
3450
3451 // begin<addOne>
3452 // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3453 __ addi (mlen, mlen, 4);
3454 __ sldi (a, out_len, 2);
3455 __ subi (a, a, 4);
3456 __ sub (a, a, mlen);
3457 __ subi (off_aux, offset, 4);
3458 __ sub (off_aux, a, off_aux);
3459
3460 __ lwzx (b, off_aux, out);
3461 __ add (b, b, carry);
3462 __ stwx (b, off_aux, out);
3463
3464 // if (((uint64_t)s >> 32) != 0) {
3465 __ srdi_ (a, b, 32);
3466 __ beq (CCR0, SKIP_ADDONE);
3467
3468 // while (--mlen >= 0) {
3469 __ bind(LOOP_ADDONE);
3470 __ subi (mlen, mlen, 4);
3471 __ cmpwi (CCR0, mlen, 0);
3472 __ beq (CCR0, SKIP_ADDONE);
3473
3474 // if (--offset_aux < 0) { // Carry out of number
3475 __ subi (off_aux, off_aux, 4);
3476 __ cmpwi (CCR0, off_aux, 0);
3477 __ blt (CCR0, SKIP_ADDONE);
3478
3479 // } else {
3480 __ lwzx (b, off_aux, out);
3481 __ addi (b, b, 1);
3482 __ stwx (b, off_aux, out);
3483 __ cmpwi (CCR0, b, 0);
3484 __ bne (CCR0, SKIP_ADDONE);
3485 __ b (LOOP_ADDONE);
3486
3487 __ bind(SKIP_ADDONE);
3488 // } } } end<addOne>
3489
3490 __ addi (offset, offset, 8);
3491 __ subi (i_minus1, i_minus1, 1);
3492 __ cmpwi (CCR0, i_minus1, 0);
3493 __ bge (CCR0, LOOP_DIAGONAL_SUM);
3494
3495 __ bind(SKIP_DIAGONAL_SUM);
3496
3497 // Shift back up and set low bit
3498 // Shifts 1 bit left up to len positions. Assumes no leading zeros
3499 // begin<primitiveLeftShift>
3500 __ cmpwi (CCR0, out_len, 0);
3501 __ ble (CCR0, SKIP_LSHIFT);
3502 __ li (i, 0);
3503 __ lwz (c, 0, out);
3504 __ subi (b, out_len, 1);
3505 __ mtctr (b);
3506
3507 __ bind(LOOP_LSHIFT);
3508 __ mr (b, c);
3509 __ addi (cs, i, 4);
3510 __ lwzx (c, out, cs);
3511
3512 __ sldi (b, b, 1);
3513 __ srwi (cs, c, 31);
3514 __ orr (b, b, cs);
3515 __ stwx (b, i, out);
3516
3517 __ addi (i, i, 4);
3518 __ bdnz (LOOP_LSHIFT);
3519
3520 __ sldi (c, out_len, 2);
3521 __ subi (c, c, 4);
3522 __ lwzx (b, out, c);
3523 __ sldi (b, b, 1);
3524 __ stwx (b, out, c);
3525
3526 __ bind(SKIP_LSHIFT);
3527 // end<primitiveLeftShift>
3528
3529 // Set low bit
3530 __ sldi (i, in_len, 2);
3531 __ subi (i, i, 4);
3532 __ lwzx (i, in, i);
3533 __ sldi (c, out_len, 2);
3534 __ subi (c, c, 4);
3535 __ lwzx (b, out, c);
3536
3537 __ andi (i, i, 1);
3538 __ orr (i, b, i);
3539
3540 __ stwx (i, out, c);
3541
3542 // Restore non-volatile regs.
3543 current_offs = -8;
3544 __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3545 __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3546 __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3547 __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3548 __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3549 __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3550 __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3551 __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3552 __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3553 __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3554 __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3555 __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3556 __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3557 __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3558 __ ld(R14, current_offs, R1_SP);
3559
3560 __ mr(ret, out);
3561 __ blr();
3562
3563 return start;
3564 }
3565
3566 /**
3567 * Arguments:
3568 *
3569 * Inputs:
3570 * R3_ARG1 - int crc
3571 * R4_ARG2 - byte* buf
3572 * R5_ARG3 - int length (of buffer)
3573 *
3574 * scratch:
3575 * R2, R6-R12
3576 *
3577 * Ouput:
3578 * R3_RET - int crc result
3579 */
3580 // Compute CRC32 function.
3581 address generate_CRC32_updateBytes(const char* name) {
3582 __ align(CodeEntryAlignment);
3583 StubCodeMark mark(this, "StubRoutines", name);
3584 address start = __ function_entry(); // Remember stub start address (is rtn value).
3739 // support for verify_oop (must happen after universe_init)
3740 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3741
3742 // arraycopy stubs used by compilers
3743 generate_arraycopy_stubs();
3744
3745 // Safefetch stubs.
3746 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3747 &StubRoutines::_safefetch32_fault_pc,
3748 &StubRoutines::_safefetch32_continuation_pc);
3749 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3750 &StubRoutines::_safefetchN_fault_pc,
3751 &StubRoutines::_safefetchN_continuation_pc);
3752
3753 #ifdef COMPILER2
3754 if (UseMultiplyToLenIntrinsic) {
3755 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3756 }
3757 #endif
3758
3759 if (UseSquareToLenIntrinsic) {
3760 StubRoutines::_squareToLen = generate_squareToLen();
3761 }
3762 if (UseMulAddIntrinsic) {
3763 StubRoutines::_mulAdd = generate_mulAdd();
3764 }
3765 if (UseMontgomeryMultiplyIntrinsic) {
3766 StubRoutines::_montgomeryMultiply
3767 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3768 }
3769 if (UseMontgomerySquareIntrinsic) {
3770 StubRoutines::_montgomerySquare
3771 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3772 }
3773
3774 if (UseAESIntrinsics) {
3775 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3776 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3777 }
3778
3779 }
3780
3781 public:
3782 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3783 // replace the standard masm with a special one:
3784 _masm = new MacroAssembler(code);
|