< prev index next >

src/cpu/ppc/vm/vm_version_ppc.cpp

Print this page
rev 13439 : Martin's 8185979: PPC64: Implement SHA2 intrinsic


  96 
  97 #ifdef COMPILER2
  98   if (!UseSIGTRAP) {
  99     MSG(TrapBasedRangeChecks);
 100     FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
 101   }
 102 
 103   // On Power6 test for section size.
 104   if (PowerArchitecturePPC64 == 6) {
 105     determine_section_size();
 106   // TODO: PPC port } else {
 107   // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
 108   }
 109 
 110   MaxVectorSize = 8;
 111 #endif
 112 
 113   // Create and print feature-string.
 114   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
 115   jio_snprintf(buf, sizeof(buf),
 116                "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 117                (has_fsqrt()   ? " fsqrt"   : ""),
 118                (has_isel()    ? " isel"    : ""),
 119                (has_lxarxeh() ? " lxarxeh" : ""),
 120                (has_cmpb()    ? " cmpb"    : ""),
 121                //(has_mftgpr()? " mftgpr"  : ""),
 122                (has_popcntb() ? " popcntb" : ""),
 123                (has_popcntw() ? " popcntw" : ""),
 124                (has_fcfids()  ? " fcfids"  : ""),
 125                (has_vand()    ? " vand"    : ""),
 126                (has_lqarx()   ? " lqarx"   : ""),
 127                (has_vcipher() ? " aes"     : ""),
 128                (has_vpmsumb() ? " vpmsumb" : ""),
 129                (has_tcheck()  ? " tcheck"  : ""),
 130                (has_mfdscr()  ? " mfdscr"  : ""),
 131                (has_vsx()     ? " vsx"     : ""),
 132                (has_ldbrx()   ? " ldbrx"   : ""),
 133                (has_stdbrx()  ? " stdbrx"  : "")

 134                // Make sure number of %s matches num_features!
 135               );
 136   _features_string = os::strdup(buf);
 137   if (Verbose) {
 138     print_features();
 139   }
 140 
 141   // PPC64 supports 8-byte compare-exchange operations (see
 142   // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
 143   // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
 144   _supports_cx8 = true;
 145 
 146   // Used by C1.
 147   _supports_atomic_getset4 = true;
 148   _supports_atomic_getadd4 = true;
 149   _supports_atomic_getset8 = true;
 150   _supports_atomic_getadd8 = true;
 151 
 152   UseSSE = 0; // Only on x86 and x64
 153 


 230     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 231       warning("AES intrinsics are not available on this CPU");
 232     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 233   }
 234 #endif
 235 
 236   if (UseAESCTRIntrinsics) {
 237     warning("AES/CTR intrinsics are not available on this CPU");
 238     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
 239   }
 240 
 241   if (UseGHASHIntrinsics) {
 242     warning("GHASH intrinsics are not available on this CPU");
 243     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
 244   }
 245 
 246   if (FLAG_IS_DEFAULT(UseFMA)) {
 247     FLAG_SET_DEFAULT(UseFMA, true);
 248   }
 249 
 250   if (UseSHA) {





 251     warning("SHA instructions are not available on this CPU");
 252     FLAG_SET_DEFAULT(UseSHA, false);
 253   }
 254   if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
 255     warning("SHA intrinsics are not available on this CPU");

 256     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);








 257     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);








 258     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
 259   }
 260 




 261   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
 262     UseMultiplyToLenIntrinsic = true;
 263   }
 264   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
 265     UseMontgomeryMultiplyIntrinsic = true;
 266   }
 267   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
 268     UseMontgomerySquareIntrinsic = true;
 269   }
 270 
 271   if (UseVectorizedMismatchIntrinsic) {
 272     warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
 273     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
 274   }
 275 
 276 
 277   // Adjust RTM (Restricted Transactional Memory) flags.
 278   if (UseRTMLocking) {
 279     // If CPU or OS are too old:
 280     // Can't continue because UseRTMLocking affects UseBiasedLocking flag


 640   // Keep R3_ARG1 unmodified, it contains &field (see below).
 641   // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
 642   a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
 643   a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
 644   a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
 645   a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
 646   a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
 647   a->popcntb(R7, R5);                          // code[5]  -> popcntb
 648   a->popcntw(R7, R5);                          // code[6]  -> popcntw
 649   a->fcfids(F3, F4);                           // code[7]  -> fcfids
 650   a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
 651   // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16
 652   a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
 653   a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
 654   a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
 655   a->tcheck(0);                                // code[12] -> tcheck
 656   a->mfdscr(R0);                               // code[13] -> mfdscr
 657   a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
 658   a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[15] -> ldbrx
 659   a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[16] -> stdbrx

 660   a->blr();
 661 
 662   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
 663   void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
 664   a->dcbz(R3_ARG1); // R3_ARG1 = addr
 665   a->blr();
 666 
 667   uint32_t *code_end = (uint32_t *)a->pc();
 668   a->flush();
 669   _features = VM_Version::unknown_m;
 670 
 671   // Print the detection code.
 672   if (PrintAssembly) {
 673     ttyLocker ttyl;
 674     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
 675     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 676   }
 677 
 678   // Measure cache line size.
 679   memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.


 691 
 692   // determine which instructions are legal.
 693   int feature_cntr = 0;
 694   if (code[feature_cntr++]) features |= fsqrt_m;
 695   if (code[feature_cntr++]) features |= fsqrts_m;
 696   if (code[feature_cntr++]) features |= isel_m;
 697   if (code[feature_cntr++]) features |= lxarxeh_m;
 698   if (code[feature_cntr++]) features |= cmpb_m;
 699   if (code[feature_cntr++]) features |= popcntb_m;
 700   if (code[feature_cntr++]) features |= popcntw_m;
 701   if (code[feature_cntr++]) features |= fcfids_m;
 702   if (code[feature_cntr++]) features |= vand_m;
 703   if (code[feature_cntr++]) features |= lqarx_m;
 704   if (code[feature_cntr++]) features |= vcipher_m;
 705   if (code[feature_cntr++]) features |= vpmsumb_m;
 706   if (code[feature_cntr++]) features |= tcheck_m;
 707   if (code[feature_cntr++]) features |= mfdscr_m;
 708   if (code[feature_cntr++]) features |= vsx_m;
 709   if (code[feature_cntr++]) features |= ldbrx_m;
 710   if (code[feature_cntr++]) features |= stdbrx_m;

 711 
 712   // Print the detection code.
 713   if (PrintAssembly) {
 714     ttyLocker ttyl;
 715     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code));
 716     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 717   }
 718 
 719   _features = features;
 720 }
 721 
 722 // Power 8: Configure Data Stream Control Register.
 723 void VM_Version::config_dscr() {
 724   // 7 InstWords for each call (function descriptor + blr instruction).
 725   const int code_size = (2+2*7)*BytesPerInstWord;
 726 
 727   // Allocate space for the code.
 728   ResourceMark rm;
 729   CodeBuffer cb("config_dscr", code_size, 0);
 730   MacroAssembler* a = new MacroAssembler(&cb);




  96 
  97 #ifdef COMPILER2
  98   if (!UseSIGTRAP) {
  99     MSG(TrapBasedRangeChecks);
 100     FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
 101   }
 102 
 103   // On Power6 test for section size.
 104   if (PowerArchitecturePPC64 == 6) {
 105     determine_section_size();
 106   // TODO: PPC port } else {
 107   // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
 108   }
 109 
 110   MaxVectorSize = 8;
 111 #endif
 112 
 113   // Create and print feature-string.
 114   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
 115   jio_snprintf(buf, sizeof(buf),
 116                "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 117                (has_fsqrt()   ? " fsqrt"   : ""),
 118                (has_isel()    ? " isel"    : ""),
 119                (has_lxarxeh() ? " lxarxeh" : ""),
 120                (has_cmpb()    ? " cmpb"    : ""),
 121                //(has_mftgpr()? " mftgpr"  : ""),
 122                (has_popcntb() ? " popcntb" : ""),
 123                (has_popcntw() ? " popcntw" : ""),
 124                (has_fcfids()  ? " fcfids"  : ""),
 125                (has_vand()    ? " vand"    : ""),
 126                (has_lqarx()   ? " lqarx"   : ""),
 127                (has_vcipher() ? " aes"     : ""),
 128                (has_vpmsumb() ? " vpmsumb" : ""),
 129                (has_tcheck()  ? " tcheck"  : ""),
 130                (has_mfdscr()  ? " mfdscr"  : ""),
 131                (has_vsx()     ? " vsx"     : ""),
 132                (has_ldbrx()   ? " ldbrx"   : ""),
 133                (has_stdbrx()  ? " stdbrx"  : ""),
 134                (has_vshasig() ? " sha"     : "")
 135                // Make sure number of %s matches num_features!
 136               );
 137   _features_string = os::strdup(buf);
 138   if (Verbose) {
 139     print_features();
 140   }
 141 
 142   // PPC64 supports 8-byte compare-exchange operations (see
 143   // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
 144   // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
 145   _supports_cx8 = true;
 146 
 147   // Used by C1.
 148   _supports_atomic_getset4 = true;
 149   _supports_atomic_getadd4 = true;
 150   _supports_atomic_getset8 = true;
 151   _supports_atomic_getadd8 = true;
 152 
 153   UseSSE = 0; // Only on x86 and x64
 154 


 231     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 232       warning("AES intrinsics are not available on this CPU");
 233     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 234   }
 235 #endif
 236 
 237   if (UseAESCTRIntrinsics) {
 238     warning("AES/CTR intrinsics are not available on this CPU");
 239     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
 240   }
 241 
 242   if (UseGHASHIntrinsics) {
 243     warning("GHASH intrinsics are not available on this CPU");
 244     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
 245   }
 246 
 247   if (FLAG_IS_DEFAULT(UseFMA)) {
 248     FLAG_SET_DEFAULT(UseFMA, true);
 249   }
 250 
 251   if (has_vshasig()) {
 252     if (FLAG_IS_DEFAULT(UseSHA)) {
 253       UseSHA = true;
 254     }
 255   } else if (UseSHA) {
 256     if (!FLAG_IS_DEFAULT(UseSHA))
 257       warning("SHA instructions are not available on this CPU");
 258     FLAG_SET_DEFAULT(UseSHA, false);
 259   }
 260 
 261   if (UseSHA1Intrinsics) {
 262     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
 263     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
 264   }
 265 
 266   if (UseSHA && has_vshasig()) {
 267     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
 268       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
 269     }
 270   } else if (UseSHA256Intrinsics) {
 271     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
 272     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
 273   }
 274 
 275   if (UseSHA && has_vshasig()) {
 276     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
 277       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
 278     }
 279   } else if (UseSHA512Intrinsics) {
 280     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
 281     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
 282   }
 283 
 284   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
 285     FLAG_SET_DEFAULT(UseSHA, false);
 286   }
 287 
 288   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
 289     UseMultiplyToLenIntrinsic = true;
 290   }
 291   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
 292     UseMontgomeryMultiplyIntrinsic = true;
 293   }
 294   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
 295     UseMontgomerySquareIntrinsic = true;
 296   }
 297 
 298   if (UseVectorizedMismatchIntrinsic) {
 299     warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
 300     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
 301   }
 302 
 303 
 304   // Adjust RTM (Restricted Transactional Memory) flags.
 305   if (UseRTMLocking) {
 306     // If CPU or OS are too old:
 307     // Can't continue because UseRTMLocking affects UseBiasedLocking flag


 667   // Keep R3_ARG1 unmodified, it contains &field (see below).
 668   // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
 669   a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
 670   a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
 671   a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
 672   a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
 673   a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
 674   a->popcntb(R7, R5);                          // code[5]  -> popcntb
 675   a->popcntw(R7, R5);                          // code[6]  -> popcntw
 676   a->fcfids(F3, F4);                           // code[7]  -> fcfids
 677   a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
 678   // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16
 679   a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
 680   a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
 681   a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
 682   a->tcheck(0);                                // code[12] -> tcheck
 683   a->mfdscr(R0);                               // code[13] -> mfdscr
 684   a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
 685   a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[15] -> ldbrx
 686   a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[16] -> stdbrx
 687   a->vshasigmaw(VR0, VR1, 1, 0xF);             // code[17] -> vshasig
 688   a->blr();
 689 
 690   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
 691   void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
 692   a->dcbz(R3_ARG1); // R3_ARG1 = addr
 693   a->blr();
 694 
 695   uint32_t *code_end = (uint32_t *)a->pc();
 696   a->flush();
 697   _features = VM_Version::unknown_m;
 698 
 699   // Print the detection code.
 700   if (PrintAssembly) {
 701     ttyLocker ttyl;
 702     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
 703     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 704   }
 705 
 706   // Measure cache line size.
 707   memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.


 719 
 720   // determine which instructions are legal.
 721   int feature_cntr = 0;
 722   if (code[feature_cntr++]) features |= fsqrt_m;
 723   if (code[feature_cntr++]) features |= fsqrts_m;
 724   if (code[feature_cntr++]) features |= isel_m;
 725   if (code[feature_cntr++]) features |= lxarxeh_m;
 726   if (code[feature_cntr++]) features |= cmpb_m;
 727   if (code[feature_cntr++]) features |= popcntb_m;
 728   if (code[feature_cntr++]) features |= popcntw_m;
 729   if (code[feature_cntr++]) features |= fcfids_m;
 730   if (code[feature_cntr++]) features |= vand_m;
 731   if (code[feature_cntr++]) features |= lqarx_m;
 732   if (code[feature_cntr++]) features |= vcipher_m;
 733   if (code[feature_cntr++]) features |= vpmsumb_m;
 734   if (code[feature_cntr++]) features |= tcheck_m;
 735   if (code[feature_cntr++]) features |= mfdscr_m;
 736   if (code[feature_cntr++]) features |= vsx_m;
 737   if (code[feature_cntr++]) features |= ldbrx_m;
 738   if (code[feature_cntr++]) features |= stdbrx_m;
 739   if (code[feature_cntr++]) features |= vshasig_m;
 740 
 741   // Print the detection code.
 742   if (PrintAssembly) {
 743     ttyLocker ttyl;
 744     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code));
 745     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 746   }
 747 
 748   _features = features;
 749 }
 750 
 751 // Power 8: Configure Data Stream Control Register.
 752 void VM_Version::config_dscr() {
 753   // 7 InstWords for each call (function descriptor + blr instruction).
 754   const int code_size = (2+2*7)*BytesPerInstWord;
 755 
 756   // Allocate space for the code.
 757   ResourceMark rm;
 758   CodeBuffer cb("config_dscr", code_size, 0);
 759   MacroAssembler* a = new MacroAssembler(&cb);


< prev index next >