3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309
3310 /**
3311 * Arguments:
3312 *
3313 * Inputs:
3314 * R3_ARG1 - int crc
3315 * R4_ARG2 - byte* buf
3316 * R5_ARG3 - int length (of buffer)
3317 *
3318 * scratch:
3319 * R2, R6-R12
3320 *
3321 * Ouput:
3322 * R3_RET - int crc result
3323 */
3324 // Compute CRC32 function.
3325 address generate_CRC32_updateBytes(const char* name) {
3326 __ align(CodeEntryAlignment);
3327 StubCodeMark mark(this, "StubRoutines", name);
3328 address start = __ function_entry(); // Remember stub start address (is rtn value).
3483 // support for verify_oop (must happen after universe_init)
3484 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3485
3486 // arraycopy stubs used by compilers
3487 generate_arraycopy_stubs();
3488
3489 // Safefetch stubs.
3490 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3491 &StubRoutines::_safefetch32_fault_pc,
3492 &StubRoutines::_safefetch32_continuation_pc);
3493 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3494 &StubRoutines::_safefetchN_fault_pc,
3495 &StubRoutines::_safefetchN_continuation_pc);
3496
3497 #ifdef COMPILER2
3498 if (UseMultiplyToLenIntrinsic) {
3499 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3500 }
3501 #endif
3502
3503 if (UseMontgomeryMultiplyIntrinsic) {
3504 StubRoutines::_montgomeryMultiply
3505 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3506 }
3507 if (UseMontgomerySquareIntrinsic) {
3508 StubRoutines::_montgomerySquare
3509 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3510 }
3511
3512 if (UseAESIntrinsics) {
3513 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3514 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3515 }
3516
3517 }
3518
3519 public:
3520 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3521 // replace the standard masm with a special one:
3522 _masm = new MacroAssembler(code);
|
3289 const Register t0 = R2;
3290 const Register t1 = R7;
3291 const Register t2 = R8;
3292 const Register t3 = R9;
3293 const Register tc0 = R10;
3294 const Register tc1 = R11;
3295 const Register tc2 = R12;
3296
3297 BLOCK_COMMENT("Stub body {");
3298 assert_different_registers(crc, data, dataLen, table);
3299
3300 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
3301
3302 BLOCK_COMMENT("return");
3303 __ mr_if_needed(R3_RET, crc); // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
3304 __ blr();
3305
3306 BLOCK_COMMENT("} Stub body");
3307 }
3308
3309 /**
3310 * Arguments:
3311 *
3312 * Input:
3313 * R3_ARG1 - out address
3314 * R4_ARG2 - in address
3315 * R5_ARG3 - offset
3316 * R6_ARG4 - len
3317 * R7_ARG5 - k
3318 * Output:
3319 * R3_RET - carry
3320 */
3321 address generate_mulAdd() {
3322 __ align(CodeEntryAlignment);
3323 StubCodeMark mark(this, "StubRoutines", "mulAdd");
3324
3325 address start = __ function_entry();
3326
3327 // C2 does not sign extend signed parameters to full 64 bits registers:
3328 __ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
3329 __ clrldi(R6_ARG4, R6_ARG4, 32); // force zero bits on higher word
3330 __ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
3331
3332 __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
3333
3334 // Moves output carry to return register
3335 __ mr (R3_RET, R10);
3336
3337 __ blr();
3338
3339 return start;
3340 }
3341
3342 /**
3343 * Arguments:
3344 *
3345 * Input:
3346 * R3_ARG1 - in address
3347 * R4_ARG2 - in length
3348 * R5_ARG3 - out address
3349 * R6_ARG4 - out length
3350 */
3351 address generate_squareToLen() {
3352 __ align(CodeEntryAlignment);
3353 StubCodeMark mark(this, "StubRoutines", "squareToLen");
3354
3355 address start = __ function_entry();
3356
3357 // args - higher word is cleaned (unsignedly) due to int to long casting
3358 const Register in = R3_ARG1;
3359 const Register in_len = R4_ARG2;
3360 __ clrldi(in_len, in_len, 32);
3361 const Register out = R5_ARG3;
3362 const Register out_len = R6_ARG4;
3363 __ clrldi(out_len, out_len, 32);
3364
3365 // output
3366 const Register ret = R3_RET;
3367
3368 // temporaries
3369 const Register lplw_s = R7;
3370 const Register in_aux = R8;
3371 const Register out_aux = R9;
3372 const Register piece = R10;
3373 const Register product = R14;
3374 const Register lplw = R15;
3375 const Register i_minus1 = R16;
3376 const Register carry = R17;
3377 const Register offset = R18;
3378 const Register off_aux = R19;
3379 const Register t = R20;
3380 const Register mlen = R21;
3381 const Register len = R22;
3382 const Register a = R23;
3383 const Register b = R24;
3384 const Register i = R25;
3385 const Register c = R26;
3386 const Register cs = R27;
3387
3388 // Labels
3389 Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
3390 Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
3391
3392 // Save non-volatile regs (frameless).
3393 int current_offs = -8;
3394 __ std(R28, current_offs, R1_SP); current_offs -= 8;
3395 __ std(R27, current_offs, R1_SP); current_offs -= 8;
3396 __ std(R26, current_offs, R1_SP); current_offs -= 8;
3397 __ std(R25, current_offs, R1_SP); current_offs -= 8;
3398 __ std(R24, current_offs, R1_SP); current_offs -= 8;
3399 __ std(R23, current_offs, R1_SP); current_offs -= 8;
3400 __ std(R22, current_offs, R1_SP); current_offs -= 8;
3401 __ std(R21, current_offs, R1_SP); current_offs -= 8;
3402 __ std(R20, current_offs, R1_SP); current_offs -= 8;
3403 __ std(R19, current_offs, R1_SP); current_offs -= 8;
3404 __ std(R18, current_offs, R1_SP); current_offs -= 8;
3405 __ std(R17, current_offs, R1_SP); current_offs -= 8;
3406 __ std(R16, current_offs, R1_SP); current_offs -= 8;
3407 __ std(R15, current_offs, R1_SP); current_offs -= 8;
3408 __ std(R14, current_offs, R1_SP);
3409
3410 // Store the squares, right shifted one bit (i.e., divided by 2)
3411 __ subi (out_aux, out, 8);
3412 __ subi (in_aux, in, 4);
3413 __ cmpwi (CCR0, in_len, 0);
3414 // Initialize lplw outside of the loop
3415 __ xorr (lplw, lplw, lplw);
3416 __ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
3417 __ mtctr (in_len);
3418
3419 __ bind(LOOP_SQUARE);
3420 __ lwzu (piece, 4, in_aux);
3421 __ mulld (product, piece, piece);
3422 // shift left 63 bits and only keep the MSB
3423 __ rldic (lplw_s, lplw, 63, 0);
3424 __ mr (lplw, product);
3425 // shift right 1 bit without sign extension
3426 __ srdi (product, product, 1);
3427 // join them to the same register and store it as Little Endian
3428 __ orr (product, lplw_s, product);
3429 __ rldicl (product, product, 32, 0);
3430 __ stdu (product, 8, out_aux);
3431 __ bdnz (LOOP_SQUARE);
3432
3433 __ bind(SKIP_LOOP_SQUARE);
3434
3435 // Add in off-diagonal sums
3436 __ cmpwi (CCR0, in_len, 0);
3437 __ ble (CCR0, SKIP_DIAGONAL_SUM);
3438 // Avoid CTR usage here in order to use it at mulAdd
3439 __ subi (i_minus1, in_len, 1);
3440 __ li (offset, 4);
3441
3442 __ bind(LOOP_DIAGONAL_SUM);
3443
3444 __ sldi (off_aux, out_len, 2);
3445 __ sub (off_aux, off_aux, offset);
3446
3447 __ mr (len, i_minus1);
3448 __ sldi (mlen, i_minus1, 2);
3449 __ lwzx (t, in, mlen);
3450
3451 __ muladd (out, in, off_aux, len, t, a, b, carry);
3452
3453 // begin<addOne>
3454 // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
3455 __ addi (mlen, mlen, 4);
3456 __ sldi (a, out_len, 2);
3457 __ subi (a, a, 4);
3458 __ sub (a, a, mlen);
3459 __ subi (off_aux, offset, 4);
3460 __ sub (off_aux, a, off_aux);
3461
3462 __ lwzx (b, off_aux, out);
3463 __ add (b, b, carry);
3464 __ stwx (b, off_aux, out);
3465
3466 // if (((uint64_t)s >> 32) != 0) {
3467 __ srdi_ (a, b, 32);
3468 __ beq (CCR0, SKIP_ADDONE);
3469
3470 // while (--mlen >= 0) {
3471 __ bind(LOOP_ADDONE);
3472 __ subi (mlen, mlen, 4);
3473 __ cmpwi (CCR0, mlen, 0);
3474 __ beq (CCR0, SKIP_ADDONE);
3475
3476 // if (--offset_aux < 0) { // Carry out of number
3477 __ subi (off_aux, off_aux, 4);
3478 __ cmpwi (CCR0, off_aux, 0);
3479 __ blt (CCR0, SKIP_ADDONE);
3480
3481 // } else {
3482 __ lwzx (b, off_aux, out);
3483 __ addi (b, b, 1);
3484 __ stwx (b, off_aux, out);
3485 __ cmpwi (CCR0, b, 0);
3486 __ bne (CCR0, SKIP_ADDONE);
3487 __ b (LOOP_ADDONE);
3488
3489 __ bind(SKIP_ADDONE);
3490 // } } } end<addOne>
3491
3492 __ addi (offset, offset, 8);
3493 __ subi (i_minus1, i_minus1, 1);
3494 __ cmpwi (CCR0, i_minus1, 0);
3495 __ bge (CCR0, LOOP_DIAGONAL_SUM);
3496
3497 __ bind(SKIP_DIAGONAL_SUM);
3498
3499 // Shift back up and set low bit
3500 // Shifts 1 bit left up to len positions. Assumes no leading zeros
3501 // begin<primitiveLeftShift>
3502 __ cmpwi (CCR0, out_len, 0);
3503 __ ble (CCR0, SKIP_LSHIFT);
3504 __ li (i, 0);
3505 __ lwz (c, 0, out);
3506 __ subi (b, out_len, 1);
3507 __ mtctr (b);
3508
3509 __ bind(LOOP_LSHIFT);
3510 __ mr (b, c);
3511 __ addi (cs, i, 4);
3512 __ lwzx (c, out, cs);
3513
3514 __ sldi (b, b, 1);
3515 __ srwi (cs, c, 31);
3516 __ orr (b, b, cs);
3517 __ stwx (b, i, out);
3518
3519 __ addi (i, i, 4);
3520 __ bdnz (LOOP_LSHIFT);
3521
3522 __ sldi (c, out_len, 2);
3523 __ subi (c, c, 4);
3524 __ lwzx (b, out, c);
3525 __ sldi (b, b, 1);
3526 __ stwx (b, out, c);
3527
3528 __ bind(SKIP_LSHIFT);
3529 // end<primitiveLeftShift>
3530
3531 // Set low bit
3532 __ sldi (i, in_len, 2);
3533 __ subi (i, i, 4);
3534 __ lwzx (i, in, i);
3535 __ sldi (c, out_len, 2);
3536 __ subi (c, c, 4);
3537 __ lwzx (b, out, c);
3538
3539 __ andi (i, i, 1);
3540 __ orr (i, b, i);
3541
3542 __ stwx (i, out, c);
3543
3544 // Restore non-volatile regs.
3545 current_offs = -8;
3546 __ ld(R28, current_offs, R1_SP); current_offs -= 8;
3547 __ ld(R27, current_offs, R1_SP); current_offs -= 8;
3548 __ ld(R26, current_offs, R1_SP); current_offs -= 8;
3549 __ ld(R25, current_offs, R1_SP); current_offs -= 8;
3550 __ ld(R24, current_offs, R1_SP); current_offs -= 8;
3551 __ ld(R23, current_offs, R1_SP); current_offs -= 8;
3552 __ ld(R22, current_offs, R1_SP); current_offs -= 8;
3553 __ ld(R21, current_offs, R1_SP); current_offs -= 8;
3554 __ ld(R20, current_offs, R1_SP); current_offs -= 8;
3555 __ ld(R19, current_offs, R1_SP); current_offs -= 8;
3556 __ ld(R18, current_offs, R1_SP); current_offs -= 8;
3557 __ ld(R17, current_offs, R1_SP); current_offs -= 8;
3558 __ ld(R16, current_offs, R1_SP); current_offs -= 8;
3559 __ ld(R15, current_offs, R1_SP); current_offs -= 8;
3560 __ ld(R14, current_offs, R1_SP);
3561
3562 __ mr(ret, out);
3563 __ blr();
3564
3565 return start;
3566 }
3567
3568 /**
3569 * Arguments:
3570 *
3571 * Inputs:
3572 * R3_ARG1 - int crc
3573 * R4_ARG2 - byte* buf
3574 * R5_ARG3 - int length (of buffer)
3575 *
3576 * scratch:
3577 * R2, R6-R12
3578 *
3579 * Ouput:
3580 * R3_RET - int crc result
3581 */
3582 // Compute CRC32 function.
3583 address generate_CRC32_updateBytes(const char* name) {
3584 __ align(CodeEntryAlignment);
3585 StubCodeMark mark(this, "StubRoutines", name);
3586 address start = __ function_entry(); // Remember stub start address (is rtn value).
3741 // support for verify_oop (must happen after universe_init)
3742 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
3743
3744 // arraycopy stubs used by compilers
3745 generate_arraycopy_stubs();
3746
3747 // Safefetch stubs.
3748 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
3749 &StubRoutines::_safefetch32_fault_pc,
3750 &StubRoutines::_safefetch32_continuation_pc);
3751 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3752 &StubRoutines::_safefetchN_fault_pc,
3753 &StubRoutines::_safefetchN_continuation_pc);
3754
3755 #ifdef COMPILER2
3756 if (UseMultiplyToLenIntrinsic) {
3757 StubRoutines::_multiplyToLen = generate_multiplyToLen();
3758 }
3759 #endif
3760
3761 if (UseSquareToLenIntrinsic) {
3762 StubRoutines::_squareToLen = generate_squareToLen();
3763 }
3764 if (UseMulAddIntrinsic) {
3765 StubRoutines::_mulAdd = generate_mulAdd();
3766 }
3767 if (UseMontgomeryMultiplyIntrinsic) {
3768 StubRoutines::_montgomeryMultiply
3769 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3770 }
3771 if (UseMontgomerySquareIntrinsic) {
3772 StubRoutines::_montgomerySquare
3773 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3774 }
3775
3776 if (UseAESIntrinsics) {
3777 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3778 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3779 }
3780
3781 }
3782
3783 public:
3784 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3785 // replace the standard masm with a special one:
3786 _masm = new MacroAssembler(code);
|