< prev index next >

src/cpu/ppc/vm/vm_version_ppc.cpp

Print this page
rev 13437 : Implement SHA256 and SHA512 intrinsics for PPC64

Sample run reduced 6.5s run to 2.8s for SHA256.


  96 
  97 #ifdef COMPILER2
  98   if (!UseSIGTRAP) {
  99     MSG(TrapBasedRangeChecks);
 100     FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
 101   }
 102 
 103   // On Power6 test for section size.
 104   if (PowerArchitecturePPC64 == 6) {
 105     determine_section_size();
 106   // TODO: PPC port } else {
 107   // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
 108   }
 109 
 110   MaxVectorSize = 8;
 111 #endif
 112 
 113   // Create and print feature-string.
 114   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
 115   jio_snprintf(buf, sizeof(buf),
 116                "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 117                (has_fsqrt()   ? " fsqrt"   : ""),
 118                (has_isel()    ? " isel"    : ""),
 119                (has_lxarxeh() ? " lxarxeh" : ""),
 120                (has_cmpb()    ? " cmpb"    : ""),
 121                //(has_mftgpr()? " mftgpr"  : ""),
 122                (has_popcntb() ? " popcntb" : ""),
 123                (has_popcntw() ? " popcntw" : ""),
 124                (has_fcfids()  ? " fcfids"  : ""),
 125                (has_vand()    ? " vand"    : ""),
 126                (has_lqarx()   ? " lqarx"   : ""),
 127                (has_vcipher() ? " aes"     : ""),
 128                (has_vpmsumb() ? " vpmsumb" : ""),
 129                (has_tcheck()  ? " tcheck"  : ""),
 130                (has_mfdscr()  ? " mfdscr"  : ""),
 131                (has_vsx()     ? " vsx"     : ""),
 132                (has_ldbrx()   ? " ldbrx"   : ""),
 133                (has_stdbrx()  ? " stdbrx"  : "")

 134                // Make sure number of %s matches num_features!
 135               );
 136   _features_string = os::strdup(buf);
 137   if (Verbose) {
 138     print_features();
 139   }
 140 
 141   // PPC64 supports 8-byte compare-exchange operations (see
 142   // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
 143   // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
 144   _supports_cx8 = true;
 145 
 146   // Used by C1.
 147   _supports_atomic_getset4 = true;
 148   _supports_atomic_getadd4 = true;
 149   _supports_atomic_getset8 = true;
 150   _supports_atomic_getadd8 = true;
 151 
 152   UseSSE = 0; // Only on x86 and x64
 153 


 230     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 231       warning("AES intrinsics are not available on this CPU");
 232     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 233   }
 234 #endif
 235 
 236   if (UseAESCTRIntrinsics) {
 237     warning("AES/CTR intrinsics are not available on this CPU");
 238     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
 239   }
 240 
 241   if (UseGHASHIntrinsics) {
 242     warning("GHASH intrinsics are not available on this CPU");
 243     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
 244   }
 245 
 246   if (FLAG_IS_DEFAULT(UseFMA)) {
 247     FLAG_SET_DEFAULT(UseFMA, true);
 248   }
 249 






































 250   if (UseSHA) {
 251     warning("SHA instructions are not available on this CPU");
 252     FLAG_SET_DEFAULT(UseSHA, false);
 253   }
 254   if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
 255     warning("SHA intrinsics are not available on this CPU");
 256     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
 257     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
 258     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
 259   }

 260 
 261   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
 262     UseMultiplyToLenIntrinsic = true;
 263   }
 264   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
 265     UseMontgomeryMultiplyIntrinsic = true;
 266   }
 267   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
 268     UseMontgomerySquareIntrinsic = true;
 269   }
 270 
 271   if (UseVectorizedMismatchIntrinsic) {
 272     warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
 273     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
 274   }
 275 
 276 
 277   // Adjust RTM (Restricted Transactional Memory) flags.
 278   if (UseRTMLocking) {
 279     // If CPU or OS are too old:


 640   // Keep R3_ARG1 unmodified, it contains &field (see below).
 641   // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
 642   a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
 643   a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
 644   a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
 645   a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
 646   a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
 647   a->popcntb(R7, R5);                          // code[5]  -> popcntb
 648   a->popcntw(R7, R5);                          // code[6]  -> popcntw
 649   a->fcfids(F3, F4);                           // code[7]  -> fcfids
 650   a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
 651   // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16
 652   a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
 653   a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
 654   a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
 655   a->tcheck(0);                                // code[12] -> tcheck
 656   a->mfdscr(R0);                               // code[13] -> mfdscr
 657   a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
 658   a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[15] -> ldbrx
 659   a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[16] -> stdbrx

 660   a->blr();
 661 
 662   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
 663   void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
 664   a->dcbz(R3_ARG1); // R3_ARG1 = addr
 665   a->blr();
 666 
 667   uint32_t *code_end = (uint32_t *)a->pc();
 668   a->flush();
 669   _features = VM_Version::unknown_m;
 670 
 671   // Print the detection code.
 672   if (PrintAssembly) {
 673     ttyLocker ttyl;
 674     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
 675     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 676   }
 677 
 678   // Measure cache line size.
 679   memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.


 691 
 692   // determine which instructions are legal.
 693   int feature_cntr = 0;
 694   if (code[feature_cntr++]) features |= fsqrt_m;
 695   if (code[feature_cntr++]) features |= fsqrts_m;
 696   if (code[feature_cntr++]) features |= isel_m;
 697   if (code[feature_cntr++]) features |= lxarxeh_m;
 698   if (code[feature_cntr++]) features |= cmpb_m;
 699   if (code[feature_cntr++]) features |= popcntb_m;
 700   if (code[feature_cntr++]) features |= popcntw_m;
 701   if (code[feature_cntr++]) features |= fcfids_m;
 702   if (code[feature_cntr++]) features |= vand_m;
 703   if (code[feature_cntr++]) features |= lqarx_m;
 704   if (code[feature_cntr++]) features |= vcipher_m;
 705   if (code[feature_cntr++]) features |= vpmsumb_m;
 706   if (code[feature_cntr++]) features |= tcheck_m;
 707   if (code[feature_cntr++]) features |= mfdscr_m;
 708   if (code[feature_cntr++]) features |= vsx_m;
 709   if (code[feature_cntr++]) features |= ldbrx_m;
 710   if (code[feature_cntr++]) features |= stdbrx_m;

 711 
 712   // Print the detection code.
 713   if (PrintAssembly) {
 714     ttyLocker ttyl;
 715     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code));
 716     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 717   }
 718 
 719   _features = features;
 720 }
 721 
 722 // Power 8: Configure Data Stream Control Register.
 723 void VM_Version::config_dscr() {
 724   // 7 InstWords for each call (function descriptor + blr instruction).
 725   const int code_size = (2+2*7)*BytesPerInstWord;
 726 
 727   // Allocate space for the code.
 728   ResourceMark rm;
 729   CodeBuffer cb("config_dscr", code_size, 0);
 730   MacroAssembler* a = new MacroAssembler(&cb);




  96 
  97 #ifdef COMPILER2
  98   if (!UseSIGTRAP) {
  99     MSG(TrapBasedRangeChecks);
 100     FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
 101   }
 102 
 103   // On Power6 test for section size.
 104   if (PowerArchitecturePPC64 == 6) {
 105     determine_section_size();
 106   // TODO: PPC port } else {
 107   // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
 108   }
 109 
 110   MaxVectorSize = 8;
 111 #endif
 112 
 113   // Create and print feature-string.
 114   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
 115   jio_snprintf(buf, sizeof(buf),
 116                "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 117                (has_fsqrt()   ? " fsqrt"   : ""),
 118                (has_isel()    ? " isel"    : ""),
 119                (has_lxarxeh() ? " lxarxeh" : ""),
 120                (has_cmpb()    ? " cmpb"    : ""),
 121                //(has_mftgpr()? " mftgpr"  : ""),
 122                (has_popcntb() ? " popcntb" : ""),
 123                (has_popcntw() ? " popcntw" : ""),
 124                (has_fcfids()  ? " fcfids"  : ""),
 125                (has_vand()    ? " vand"    : ""),
 126                (has_lqarx()   ? " lqarx"   : ""),
 127                (has_vcipher() ? " aes"     : ""),
 128                (has_vpmsumb() ? " vpmsumb" : ""),
 129                (has_tcheck()  ? " tcheck"  : ""),
 130                (has_mfdscr()  ? " mfdscr"  : ""),
 131                (has_vsx()     ? " vsx"     : ""),
 132                (has_ldbrx()   ? " ldbrx"   : ""),
 133                (has_stdbrx()  ? " stdbrx"  : ""),
 134                (has_vshasig() ? " sha"     : "")
 135                // Make sure number of %s matches num_features!
 136               );
 137   _features_string = os::strdup(buf);
 138   if (Verbose) {
 139     print_features();
 140   }
 141 
 142   // PPC64 supports 8-byte compare-exchange operations (see
 143   // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
 144   // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
 145   _supports_cx8 = true;
 146 
 147   // Used by C1.
 148   _supports_atomic_getset4 = true;
 149   _supports_atomic_getadd4 = true;
 150   _supports_atomic_getset8 = true;
 151   _supports_atomic_getadd8 = true;
 152 
 153   UseSSE = 0; // Only on x86 and x64
 154 


 231     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 232       warning("AES intrinsics are not available on this CPU");
 233     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 234   }
 235 #endif
 236 
 237   if (UseAESCTRIntrinsics) {
 238     warning("AES/CTR intrinsics are not available on this CPU");
 239     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
 240   }
 241 
 242   if (UseGHASHIntrinsics) {
 243     warning("GHASH intrinsics are not available on this CPU");
 244     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
 245   }
 246 
 247   if (FLAG_IS_DEFAULT(UseFMA)) {
 248     FLAG_SET_DEFAULT(UseFMA, true);
 249   }
 250 
 251 #if defined(VM_LITTLE_ENDIAN)
 252   if (has_vshasig()) {
 253     if (FLAG_IS_DEFAULT(UseSHA)) {
 254       UseSHA = true;
 255     }
 256   } else if (UseSHA) {
 257     if (!FLAG_IS_DEFAULT(UseSHA))
 258       warning("SHA instructions are not available on this CPU");
 259     FLAG_SET_DEFAULT(UseSHA, false);
 260   }
 261 
 262   if (UseSHA1Intrinsics) {
 263     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
 264     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
 265   }
 266 
 267   if (UseSHA && has_vshasig()) {
 268     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
 269       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
 270     }
 271   } else if (UseSHA256Intrinsics) {
 272     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
 273     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
 274   }
 275 
 276   if (UseSHA && has_vshasig()) {
 277     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
 278       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
 279     }
 280   } else if (UseSHA512Intrinsics) {
 281     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
 282     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
 283   }
 284 
 285   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
 286     FLAG_SET_DEFAULT(UseSHA, false);
 287   }
 288 #else
 289   if (UseSHA) {
 290     warning("SHA instructions are not available on this CPU");
 291     FLAG_SET_DEFAULT(UseSHA, false);
 292   }
 293   if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
 294     warning("SHA intrinsics are not available on this CPU");
 295     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
 296     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
 297     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
 298   }
 299 #endif
 300 
 301   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
 302     UseMultiplyToLenIntrinsic = true;
 303   }
 304   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
 305     UseMontgomeryMultiplyIntrinsic = true;
 306   }
 307   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
 308     UseMontgomerySquareIntrinsic = true;
 309   }
 310 
 311   if (UseVectorizedMismatchIntrinsic) {
 312     warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
 313     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
 314   }
 315 
 316 
 317   // Adjust RTM (Restricted Transactional Memory) flags.
 318   if (UseRTMLocking) {
 319     // If CPU or OS are too old:


 680   // Keep R3_ARG1 unmodified, it contains &field (see below).
 681   // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
 682   a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
 683   a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
 684   a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
 685   a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
 686   a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
 687   a->popcntb(R7, R5);                          // code[5]  -> popcntb
 688   a->popcntw(R7, R5);                          // code[6]  -> popcntw
 689   a->fcfids(F3, F4);                           // code[7]  -> fcfids
 690   a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
 691   // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16
 692   a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
 693   a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
 694   a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
 695   a->tcheck(0);                                // code[12] -> tcheck
 696   a->mfdscr(R0);                               // code[13] -> mfdscr
 697   a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
 698   a->ldbrx(R7, R3_ARG1, R4_ARG2);              // code[15] -> ldbrx
 699   a->stdbrx(R7, R3_ARG1, R4_ARG2);             // code[16] -> stdbrx
 700   a->vshasigmaw(VR0, VR1, 1, 0xF);             // code[17] -> vshasig
 701   a->blr();
 702 
 703   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
 704   void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
 705   a->dcbz(R3_ARG1); // R3_ARG1 = addr
 706   a->blr();
 707 
 708   uint32_t *code_end = (uint32_t *)a->pc();
 709   a->flush();
 710   _features = VM_Version::unknown_m;
 711 
 712   // Print the detection code.
 713   if (PrintAssembly) {
 714     ttyLocker ttyl;
 715     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
 716     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 717   }
 718 
 719   // Measure cache line size.
 720   memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.


 732 
 733   // determine which instructions are legal.
 734   int feature_cntr = 0;
 735   if (code[feature_cntr++]) features |= fsqrt_m;
 736   if (code[feature_cntr++]) features |= fsqrts_m;
 737   if (code[feature_cntr++]) features |= isel_m;
 738   if (code[feature_cntr++]) features |= lxarxeh_m;
 739   if (code[feature_cntr++]) features |= cmpb_m;
 740   if (code[feature_cntr++]) features |= popcntb_m;
 741   if (code[feature_cntr++]) features |= popcntw_m;
 742   if (code[feature_cntr++]) features |= fcfids_m;
 743   if (code[feature_cntr++]) features |= vand_m;
 744   if (code[feature_cntr++]) features |= lqarx_m;
 745   if (code[feature_cntr++]) features |= vcipher_m;
 746   if (code[feature_cntr++]) features |= vpmsumb_m;
 747   if (code[feature_cntr++]) features |= tcheck_m;
 748   if (code[feature_cntr++]) features |= mfdscr_m;
 749   if (code[feature_cntr++]) features |= vsx_m;
 750   if (code[feature_cntr++]) features |= ldbrx_m;
 751   if (code[feature_cntr++]) features |= stdbrx_m;
 752   if (code[feature_cntr++]) features |= vshasig_m;
 753 
 754   // Print the detection code.
 755   if (PrintAssembly) {
 756     ttyLocker ttyl;
 757     tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code));
 758     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
 759   }
 760 
 761   _features = features;
 762 }
 763 
 764 // Power 8: Configure Data Stream Control Register.
 765 void VM_Version::config_dscr() {
 766   // 7 InstWords for each call (function descriptor + blr instruction).
 767   const int code_size = (2+2*7)*BytesPerInstWord;
 768 
 769   // Allocate space for the code.
 770   ResourceMark rm;
 771   CodeBuffer cb("config_dscr", code_size, 0);
 772   MacroAssembler* a = new MacroAssembler(&cb);


< prev index next >