1 /*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "compiler/disassembler.hpp"
30 #include "memory/resourceArea.hpp"
31 #include "prims/jvm.h"
32 #include "runtime/java.hpp"
33 #include "runtime/os.hpp"
34 #include "runtime/stubCodeGenerator.hpp"
35 #include "utilities/align.hpp"
36 #include "utilities/defaultStream.hpp"
37 #include "utilities/globalDefinitions.hpp"
38 #include "vm_version_ppc.hpp"
39
40 # include <sys/sysinfo.h>
41
42 bool VM_Version::_is_determine_features_test_running = false;
43 uint64_t VM_Version::_dscr_val = 0;
44
45 #define MSG(flag) \
46 if (flag && !FLAG_IS_DEFAULT(flag)) \
47 jio_fprintf(defaultStream::error_stream(), \
48 "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \
49 " -XX:+" #flag " will be disabled!\n");
50
51 void VM_Version::initialize() {
52
53 // Test which instructions are supported and measure cache line size.
54 determine_features();
55
56 // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
57 if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
58 if (VM_Version::has_lqarx()) {
59 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
60 } else if (VM_Version::has_popcntw()) {
61 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
62 } else if (VM_Version::has_cmpb()) {
63 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
64 } else if (VM_Version::has_popcntb()) {
65 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 5);
66 } else {
67 FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0);
68 }
69 }
70
71 bool PowerArchitecturePPC64_ok = false;
72 switch (PowerArchitecturePPC64) {
73 case 8: if (!VM_Version::has_lqarx() ) break;
74 case 7: if (!VM_Version::has_popcntw()) break;
75 case 6: if (!VM_Version::has_cmpb() ) break;
76 case 5: if (!VM_Version::has_popcntb()) break;
77 case 0: PowerArchitecturePPC64_ok = true; break;
78 default: break;
79 }
80 guarantee(PowerArchitecturePPC64_ok, "PowerArchitecturePPC64 cannot be set to "
81 UINTX_FORMAT " on this machine", PowerArchitecturePPC64);
82
83 // Power 8: Configure Data Stream Control Register.
84 if (PowerArchitecturePPC64 >= 8 && has_mfdscr()) {
85 config_dscr();
86 }
87
88 if (!UseSIGTRAP) {
89 MSG(TrapBasedICMissChecks);
90 MSG(TrapBasedNotEntrantChecks);
91 MSG(TrapBasedNullChecks);
92 FLAG_SET_ERGO(bool, TrapBasedNotEntrantChecks, false);
93 FLAG_SET_ERGO(bool, TrapBasedNullChecks, false);
94 FLAG_SET_ERGO(bool, TrapBasedICMissChecks, false);
95 }
96
97 #ifdef COMPILER2
98 if (!UseSIGTRAP) {
99 MSG(TrapBasedRangeChecks);
100 FLAG_SET_ERGO(bool, TrapBasedRangeChecks, false);
101 }
102
103 // On Power6 test for section size.
104 if (PowerArchitecturePPC64 == 6) {
105 determine_section_size();
106 // TODO: PPC port } else {
107 // TODO: PPC port PdScheduling::power6SectorSize = 0x20;
108 }
109
110 MaxVectorSize = 8;
111 #endif
112
113 // Create and print feature-string.
114 char buf[(num_features+1) * 16]; // Max 16 chars per feature.
115 jio_snprintf(buf, sizeof(buf),
116 "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
117 (has_fsqrt() ? " fsqrt" : ""),
118 (has_isel() ? " isel" : ""),
119 (has_lxarxeh() ? " lxarxeh" : ""),
120 (has_cmpb() ? " cmpb" : ""),
121 //(has_mftgpr()? " mftgpr" : ""),
122 (has_popcntb() ? " popcntb" : ""),
123 (has_popcntw() ? " popcntw" : ""),
124 (has_fcfids() ? " fcfids" : ""),
125 (has_vand() ? " vand" : ""),
126 (has_lqarx() ? " lqarx" : ""),
127 (has_vcipher() ? " aes" : ""),
128 (has_vpmsumb() ? " vpmsumb" : ""),
129 (has_tcheck() ? " tcheck" : ""),
130 (has_mfdscr() ? " mfdscr" : ""),
131 (has_vsx() ? " vsx" : ""),
132 (has_ldbrx() ? " ldbrx" : ""),
133 (has_stdbrx() ? " stdbrx" : ""),
134 (has_vshasig() ? " sha" : "")
135 // Make sure number of %s matches num_features!
136 );
137 _features_string = os::strdup(buf);
138 if (Verbose) {
139 print_features();
140 }
141
142 // PPC64 supports 8-byte compare-exchange operations (see
143 // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
144 // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
145 _supports_cx8 = true;
146
147 // Used by C1.
148 _supports_atomic_getset4 = true;
149 _supports_atomic_getadd4 = true;
150 _supports_atomic_getset8 = true;
151 _supports_atomic_getadd8 = true;
152
153 UseSSE = 0; // Only on x86 and x64
154
155 intx cache_line_size = L1_data_cache_line_size();
156
157 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;
158
159 if (AllocatePrefetchStyle == 4) {
160 AllocatePrefetchStepSize = cache_line_size; // Need exact value.
161 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 12; // Use larger blocks by default.
162 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 2*cache_line_size; // Default is not defined?
163 } else {
164 if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size;
165 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 3; // Optimistic value.
166 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3*cache_line_size; // Default is not defined?
167 }
168
169 assert(AllocatePrefetchLines > 0, "invalid value");
170 if (AllocatePrefetchLines < 1) { // Set valid value in product VM.
171 AllocatePrefetchLines = 1; // Conservative value.
172 }
173
174 if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) {
175 AllocatePrefetchStyle = 1; // Fall back if inappropriate.
176 }
177
178 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
179
180 // If defined(VM_LITTLE_ENDIAN) and running on Power8 or newer hardware,
181 // the implementation uses the vector instructions available with Power8.
182 // In all other cases, the implementation uses only generally available instructions.
183 if (!UseCRC32Intrinsics) {
184 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
185 FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
186 }
187 }
188
189 // Implementation does not use any of the vector instructions available with Power8.
190 // Their exploitation is still pending (aka "work in progress").
191 if (!UseCRC32CIntrinsics) {
192 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
193 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
194 }
195 }
196
197 // TODO: Provide implementation.
198 if (UseAdler32Intrinsics) {
199 warning("Adler32Intrinsics not available on this CPU.");
200 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
201 }
202
203 // The AES intrinsic stubs require AES instruction support.
204 #if defined(VM_LITTLE_ENDIAN)
205 if (has_vcipher()) {
206 if (FLAG_IS_DEFAULT(UseAES)) {
207 UseAES = true;
208 }
209 } else if (UseAES) {
210 if (!FLAG_IS_DEFAULT(UseAES))
211 warning("AES instructions are not available on this CPU");
212 FLAG_SET_DEFAULT(UseAES, false);
213 }
214
215 if (UseAES && has_vcipher()) {
216 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
217 UseAESIntrinsics = true;
218 }
219 } else if (UseAESIntrinsics) {
220 if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
221 warning("AES intrinsics are not available on this CPU");
222 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
223 }
224
225 #else
226 if (UseAES) {
227 warning("AES instructions are not available on this CPU");
228 FLAG_SET_DEFAULT(UseAES, false);
229 }
230 if (UseAESIntrinsics) {
231 if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
232 warning("AES intrinsics are not available on this CPU");
233 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
234 }
235 #endif
236
237 if (UseAESCTRIntrinsics) {
238 warning("AES/CTR intrinsics are not available on this CPU");
239 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
240 }
241
242 if (UseGHASHIntrinsics) {
243 warning("GHASH intrinsics are not available on this CPU");
244 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
245 }
246
247 if (FLAG_IS_DEFAULT(UseFMA)) {
248 FLAG_SET_DEFAULT(UseFMA, true);
249 }
250
251 #if defined(VM_LITTLE_ENDIAN)
252 if (has_vshasig()) {
253 if (FLAG_IS_DEFAULT(UseSHA)) {
254 UseSHA = true;
255 }
256 } else if (UseSHA) {
257 if (!FLAG_IS_DEFAULT(UseSHA))
258 warning("SHA instructions are not available on this CPU");
259 FLAG_SET_DEFAULT(UseSHA, false);
260 }
261
262 if (UseSHA1Intrinsics) {
263 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
264 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
265 }
266
267 if (UseSHA && has_vshasig()) {
268 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
269 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
270 }
271 } else if (UseSHA256Intrinsics) {
272 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
273 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
274 }
275
276 if (UseSHA && has_vshasig()) {
277 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
278 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
279 }
280 } else if (UseSHA512Intrinsics) {
281 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
282 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
283 }
284
285 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
286 FLAG_SET_DEFAULT(UseSHA, false);
287 }
288 #else
289 if (UseSHA) {
290 warning("SHA instructions are not available on this CPU");
291 FLAG_SET_DEFAULT(UseSHA, false);
292 }
293 if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
294 warning("SHA intrinsics are not available on this CPU");
295 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
296 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
297 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
298 }
299 #endif
300
301 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
302 UseMultiplyToLenIntrinsic = true;
303 }
304 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
305 UseMontgomeryMultiplyIntrinsic = true;
306 }
307 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
308 UseMontgomerySquareIntrinsic = true;
309 }
310
311 if (UseVectorizedMismatchIntrinsic) {
312 warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
313 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
314 }
315
316
317 // Adjust RTM (Restricted Transactional Memory) flags.
318 if (UseRTMLocking) {
319 // If CPU or OS are too old:
320 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
321 // setting during arguments processing. See use_biased_locking().
322 // VM_Version_init() is executed after UseBiasedLocking is used
323 // in Thread::allocate().
324 if (!has_tcheck()) {
325 vm_exit_during_initialization("RTM instructions are not available on this CPU");
326 }
327 bool os_too_old = true;
328 #ifdef AIX
329 // Actually, this is supported since AIX 7.1.. Unfortunately, this first
330 // contained bugs, so that it can only be enabled after AIX 7.1.3.30.
331 // The Java property os.version, which is used in RTM tests to decide
332 // whether the feature is available, only knows major and minor versions.
333 // We don't want to change this property, as user code might depend on it.
334 // So the tests can not check on subversion 3.30, and we only enable RTM
335 // with AIX 7.2.
336 if (os::Aix::os_version() >= 0x07020000) { // At least AIX 7.2.
337 os_too_old = false;
338 }
339 #endif
340 #ifdef LINUX
341 // At least Linux kernel 4.2, as the problematic behavior of syscalls
342 // being called in the middle of a transaction has been addressed.
343 // Please, refer to commit b4b56f9ecab40f3b4ef53e130c9f6663be491894
344 // in Linux kernel source tree: https://goo.gl/Kc5i7A
345 if (os::Linux::os_version_is_known()) {
346 if (os::Linux::os_version() >= 0x040200)
347 os_too_old = false;
348 } else {
349 vm_exit_during_initialization("RTM can not be enabled: kernel version is unknown.");
350 }
351 #endif
352 if (os_too_old) {
353 vm_exit_during_initialization("RTM is not supported on this OS version.");
354 }
355 }
356
357 if (UseRTMLocking) {
358 #if INCLUDE_RTM_OPT
359 if (!UnlockExperimentalVMOptions) {
360 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. "
361 "It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
362 } else {
363 warning("UseRTMLocking is only available as experimental option on this platform.");
364 }
365 if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
366 // RTM locking should be used only for applications with
367 // high lock contention. For now we do not use it by default.
368 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
369 }
370 #else
371 // Only C2 does RTM locking optimization.
372 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
373 // setting during arguments processing. See use_biased_locking().
374 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
375 #endif
376 } else { // !UseRTMLocking
377 if (UseRTMForStackLocks) {
378 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
379 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
380 }
381 FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
382 }
383 if (UseRTMDeopt) {
384 FLAG_SET_DEFAULT(UseRTMDeopt, false);
385 }
386 if (PrintPreciseRTMLockingStatistics) {
387 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
388 }
389 }
390
391 // This machine allows unaligned memory accesses
392 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
393 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
394 }
395 }
396
397 bool VM_Version::use_biased_locking() {
398 #if INCLUDE_RTM_OPT
399 // RTM locking is most useful when there is high lock contention and
400 // low data contention. With high lock contention the lock is usually
401 // inflated and biased locking is not suitable for that case.
402 // RTM locking code requires that biased locking is off.
403 // Note: we can't switch off UseBiasedLocking in get_processor_features()
404 // because it is used by Thread::allocate() which is called before
405 // VM_Version::initialize().
406 if (UseRTMLocking && UseBiasedLocking) {
407 if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
408 FLAG_SET_DEFAULT(UseBiasedLocking, false);
409 } else {
410 warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
411 UseBiasedLocking = false;
412 }
413 }
414 #endif
415 return UseBiasedLocking;
416 }
417
418 void VM_Version::print_features() {
419 tty->print_cr("Version: %s L1_data_cache_line_size=%d", features_string(), L1_data_cache_line_size());
420 }
421
422 #ifdef COMPILER2
423 // Determine section size on power6: If section size is 8 instructions,
424 // there should be a difference between the two testloops of ~15 %. If
425 // no difference is detected the section is assumed to be 32 instructions.
426 void VM_Version::determine_section_size() {
427
428 int unroll = 80;
429
430 const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord;
431
432 // Allocate space for the code.
433 ResourceMark rm;
434 CodeBuffer cb("detect_section_size", code_size, 0);
435 MacroAssembler* a = new MacroAssembler(&cb);
436
437 uint32_t *code = (uint32_t *)a->pc();
438 // Emit code.
439 void (*test1)() = (void(*)())(void *)a->function_entry();
440
441 Label l1;
442
443 a->li(R4, 1);
444 a->sldi(R4, R4, 28);
445 a->b(l1);
446 a->align(CodeEntryAlignment);
447
448 a->bind(l1);
449
450 for (int i = 0; i < unroll; i++) {
451 // Schleife 1
452 // ------- sector 0 ------------
453 // ;; 0
454 a->nop(); // 1
455 a->fpnop0(); // 2
456 a->fpnop1(); // 3
457 a->addi(R4,R4, -1); // 4
458
459 // ;; 1
460 a->nop(); // 5
461 a->fmr(F6, F6); // 6
462 a->fmr(F7, F7); // 7
463 a->endgroup(); // 8
464 // ------- sector 8 ------------
465
466 // ;; 2
467 a->nop(); // 9
468 a->nop(); // 10
469 a->fmr(F8, F8); // 11
470 a->fmr(F9, F9); // 12
471
472 // ;; 3
473 a->nop(); // 13
474 a->fmr(F10, F10); // 14
475 a->fmr(F11, F11); // 15
476 a->endgroup(); // 16
477 // -------- sector 16 -------------
478
479 // ;; 4
480 a->nop(); // 17
481 a->nop(); // 18
482 a->fmr(F15, F15); // 19
483 a->fmr(F16, F16); // 20
484
485 // ;; 5
486 a->nop(); // 21
487 a->fmr(F17, F17); // 22
488 a->fmr(F18, F18); // 23
489 a->endgroup(); // 24
490 // ------- sector 24 ------------
491
492 // ;; 6
493 a->nop(); // 25
494 a->nop(); // 26
495 a->fmr(F19, F19); // 27
496 a->fmr(F20, F20); // 28
497
498 // ;; 7
499 a->nop(); // 29
500 a->fmr(F21, F21); // 30
501 a->fmr(F22, F22); // 31
502 a->brnop0(); // 32
503
504 // ------- sector 32 ------------
505 }
506
507 // ;; 8
508 a->cmpdi(CCR0, R4, unroll); // 33
509 a->bge(CCR0, l1); // 34
510 a->blr();
511
512 // Emit code.
513 void (*test2)() = (void(*)())(void *)a->function_entry();
514 // uint32_t *code = (uint32_t *)a->pc();
515
516 Label l2;
517
518 a->li(R4, 1);
519 a->sldi(R4, R4, 28);
520 a->b(l2);
521 a->align(CodeEntryAlignment);
522
523 a->bind(l2);
524
525 for (int i = 0; i < unroll; i++) {
526 // Schleife 2
527 // ------- sector 0 ------------
528 // ;; 0
529 a->brnop0(); // 1
530 a->nop(); // 2
531 //a->cmpdi(CCR0, R4, unroll);
532 a->fpnop0(); // 3
533 a->fpnop1(); // 4
534 a->addi(R4,R4, -1); // 5
535
536 // ;; 1
537
538 a->nop(); // 6
539 a->fmr(F6, F6); // 7
540 a->fmr(F7, F7); // 8
541 // ------- sector 8 ---------------
542
543 // ;; 2
544 a->endgroup(); // 9
545
546 // ;; 3
547 a->nop(); // 10
548 a->nop(); // 11
549 a->fmr(F8, F8); // 12
550
551 // ;; 4
552 a->fmr(F9, F9); // 13
553 a->nop(); // 14
554 a->fmr(F10, F10); // 15
555
556 // ;; 5
557 a->fmr(F11, F11); // 16
558 // -------- sector 16 -------------
559
560 // ;; 6
561 a->endgroup(); // 17
562
563 // ;; 7
564 a->nop(); // 18
565 a->nop(); // 19
566 a->fmr(F15, F15); // 20
567
568 // ;; 8
569 a->fmr(F16, F16); // 21
570 a->nop(); // 22
571 a->fmr(F17, F17); // 23
572
573 // ;; 9
574 a->fmr(F18, F18); // 24
575 // -------- sector 24 -------------
576
577 // ;; 10
578 a->endgroup(); // 25
579
580 // ;; 11
581 a->nop(); // 26
582 a->nop(); // 27
583 a->fmr(F19, F19); // 28
584
585 // ;; 12
586 a->fmr(F20, F20); // 29
587 a->nop(); // 30
588 a->fmr(F21, F21); // 31
589
590 // ;; 13
591 a->fmr(F22, F22); // 32
592 }
593
594 // -------- sector 32 -------------
595 // ;; 14
596 a->cmpdi(CCR0, R4, unroll); // 33
597 a->bge(CCR0, l2); // 34
598
599 a->blr();
600 uint32_t *code_end = (uint32_t *)a->pc();
601 a->flush();
602
603 double loop1_seconds,loop2_seconds, rel_diff;
604 uint64_t start1, stop1;
605
606 start1 = os::current_thread_cpu_time(false);
607 (*test1)();
608 stop1 = os::current_thread_cpu_time(false);
609 loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0);
610
611
612 start1 = os::current_thread_cpu_time(false);
613 (*test2)();
614 stop1 = os::current_thread_cpu_time(false);
615
616 loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0);
617
618 rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100;
619
620 if (PrintAssembly) {
621 ttyLocker ttyl;
622 tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
623 Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
624 tty->print_cr("Time loop1 :%f", loop1_seconds);
625 tty->print_cr("Time loop2 :%f", loop2_seconds);
626 tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff);
627
628 if (rel_diff > 12.0) {
629 tty->print_cr("Section Size 8 Instructions");
630 } else{
631 tty->print_cr("Section Size 32 Instructions or Power5");
632 }
633 }
634
635 #if 0 // TODO: PPC port
636 // Set sector size (if not set explicitly).
637 if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) {
638 if (rel_diff > 12.0) {
639 PdScheduling::power6SectorSize = 0x20;
640 } else {
641 PdScheduling::power6SectorSize = 0x80;
642 }
643 } else if (Power6SectorSize128PPC64) {
644 PdScheduling::power6SectorSize = 0x80;
645 } else {
646 PdScheduling::power6SectorSize = 0x20;
647 }
648 #endif
649 if (UsePower6SchedulerPPC64) Unimplemented();
650 }
651 #endif // COMPILER2
652
653 void VM_Version::determine_features() {
654 #if defined(ABI_ELFv2)
655 // 1 InstWord per call for the blr instruction.
656 const int code_size = (num_features+1+2*1)*BytesPerInstWord;
657 #else
658 // 7 InstWords for each call (function descriptor + blr instruction).
659 const int code_size = (num_features+1+2*7)*BytesPerInstWord;
660 #endif
661 int features = 0;
662
663 // create test area
664 enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size).
665 char test_area[BUFFER_SIZE];
666 char *mid_of_test_area = &test_area[BUFFER_SIZE>>1];
667
668 // Allocate space for the code.
669 ResourceMark rm;
670 CodeBuffer cb("detect_cpu_features", code_size, 0);
671 MacroAssembler* a = new MacroAssembler(&cb);
672
673 // Must be set to true so we can generate the test code.
674 _features = VM_Version::all_features_m;
675
676 // Emit code.
677 void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry();
678 uint32_t *code = (uint32_t *)a->pc();
679 // Don't use R0 in ldarx.
680 // Keep R3_ARG1 unmodified, it contains &field (see below).
681 // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
682 a->fsqrt(F3, F4); // code[0] -> fsqrt_m
683 a->fsqrts(F3, F4); // code[1] -> fsqrts_m
684 a->isel(R7, R5, R6, 0); // code[2] -> isel_m
685 a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
686 a->cmpb(R7, R5, R6); // code[4] -> cmpb
687 a->popcntb(R7, R5); // code[5] -> popcntb
688 a->popcntw(R7, R5); // code[6] -> popcntw
689 a->fcfids(F3, F4); // code[7] -> fcfids
690 a->vand(VR0, VR0, VR0); // code[8] -> vand
691 // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16
692 a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m
693 a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher
694 a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb
695 a->tcheck(0); // code[12] -> tcheck
696 a->mfdscr(R0); // code[13] -> mfdscr
697 a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx
698 a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> ldbrx
699 a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[16] -> stdbrx
700 a->vshasigmaw(VR0, VR1, 1, 0xF); // code[17] -> vshasig
701 a->blr();
702
703 // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
704 void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry();
705 a->dcbz(R3_ARG1); // R3_ARG1 = addr
706 a->blr();
707
708 uint32_t *code_end = (uint32_t *)a->pc();
709 a->flush();
710 _features = VM_Version::unknown_m;
711
712 // Print the detection code.
713 if (PrintAssembly) {
714 ttyLocker ttyl;
715 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code));
716 Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
717 }
718
719 // Measure cache line size.
720 memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF.
721 (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle.
722 int count = 0; // count zeroed bytes
723 for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
724 guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
725 _L1_data_cache_line_size = count;
726
727 // Execute code. Illegal instructions will be replaced by 0 in the signal handler.
728 VM_Version::_is_determine_features_test_running = true;
729 // We must align the first argument to 16 bytes because of the lqarx check.
730 (*test)(align_up((address)mid_of_test_area, 16), 0);
731 VM_Version::_is_determine_features_test_running = false;
732
733 // determine which instructions are legal.
734 int feature_cntr = 0;
735 if (code[feature_cntr++]) features |= fsqrt_m;
736 if (code[feature_cntr++]) features |= fsqrts_m;
737 if (code[feature_cntr++]) features |= isel_m;
738 if (code[feature_cntr++]) features |= lxarxeh_m;
739 if (code[feature_cntr++]) features |= cmpb_m;
740 if (code[feature_cntr++]) features |= popcntb_m;
741 if (code[feature_cntr++]) features |= popcntw_m;
742 if (code[feature_cntr++]) features |= fcfids_m;
743 if (code[feature_cntr++]) features |= vand_m;
744 if (code[feature_cntr++]) features |= lqarx_m;
745 if (code[feature_cntr++]) features |= vcipher_m;
746 if (code[feature_cntr++]) features |= vpmsumb_m;
747 if (code[feature_cntr++]) features |= tcheck_m;
748 if (code[feature_cntr++]) features |= mfdscr_m;
749 if (code[feature_cntr++]) features |= vsx_m;
750 if (code[feature_cntr++]) features |= ldbrx_m;
751 if (code[feature_cntr++]) features |= stdbrx_m;
752 if (code[feature_cntr++]) features |= vshasig_m;
753
754 // Print the detection code.
755 if (PrintAssembly) {
756 ttyLocker ttyl;
757 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code));
758 Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
759 }
760
761 _features = features;
762 }
763
764 // Power 8: Configure Data Stream Control Register.
765 void VM_Version::config_dscr() {
766 // 7 InstWords for each call (function descriptor + blr instruction).
767 const int code_size = (2+2*7)*BytesPerInstWord;
768
769 // Allocate space for the code.
770 ResourceMark rm;
771 CodeBuffer cb("config_dscr", code_size, 0);
772 MacroAssembler* a = new MacroAssembler(&cb);
773
774 // Emit code.
775 uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry();
776 uint32_t *code = (uint32_t *)a->pc();
777 a->mfdscr(R3);
778 a->blr();
779
780 void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry();
781 a->mtdscr(R3);
782 a->blr();
783
784 uint32_t *code_end = (uint32_t *)a->pc();
785 a->flush();
786
787 // Print the detection code.
788 if (PrintAssembly) {
789 ttyLocker ttyl;
790 tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", p2i(code));
791 Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
792 }
793
794 // Apply the configuration if needed.
795 _dscr_val = (*get_dscr)();
796 if (Verbose) {
797 tty->print_cr("dscr value was 0x%lx" , _dscr_val);
798 }
799 bool change_requested = false;
800 if (DSCR_PPC64 != (uintx)-1) {
801 _dscr_val = DSCR_PPC64;
802 change_requested = true;
803 }
804 if (DSCR_DPFD_PPC64 <= 7) {
805 uint64_t mask = 0x7;
806 if ((_dscr_val & mask) != DSCR_DPFD_PPC64) {
807 _dscr_val = (_dscr_val & ~mask) | (DSCR_DPFD_PPC64);
808 change_requested = true;
809 }
810 }
811 if (DSCR_URG_PPC64 <= 7) {
812 uint64_t mask = 0x7 << 6;
813 if ((_dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
814 _dscr_val = (_dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
815 change_requested = true;
816 }
817 }
818 if (change_requested) {
819 (*set_dscr)(_dscr_val);
820 if (Verbose) {
821 tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
822 }
823 }
824 }
825
826 static uint64_t saved_features = 0;
827
828 void VM_Version::allow_all() {
829 saved_features = _features;
830 _features = all_features_m;
831 }
832
833 void VM_Version::revert() {
834 _features = saved_features;
835 }