jdk10-hotspot-hg Sdiff src/cpu/ppc/vm

src/cpu/ppc/vm/macroAssembler_ppc.hpp

rev 13389 : PPC: Implement MulAdd and SquareToLen intrinsics

This implementation is based on the algorithm implemented in java. It
yields a performance speedup of:
JDK8 - 23%
JDK9 - 5%
JDK10 - 5%

 795 #endif
 796 
 797   // Emitters for BigInteger.multiplyToLen intrinsic.
 798   inline void multiply64(Register dest_hi, Register dest_lo,
 799                          Register x, Register y);
 800   void add2_with_carry(Register dest_hi, Register dest_lo,
 801                        Register src1, Register src2);
 802   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
 803                              Register y, Register y_idx, Register z,
 804                              Register carry, Register product_high, Register product,
 805                              Register idx, Register kdx, Register tmp);
 806   void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
 807                               Register yz_idx, Register idx, Register carry,
 808                               Register product_high, Register product, Register tmp,
 809                               int offset);
 810   void multiply_128_x_128_loop(Register x_xstart,
 811                                Register y, Register z,
 812                                Register yz_idx, Register idx, Register carry,
 813                                Register product_high, Register product,
 814                                Register carry2, Register tmp);


 815   void multiply_to_len(Register x, Register xlen,
 816                        Register y, Register ylen,
 817                        Register z, Register zlen,
 818                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
 819                        Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
 820                        Register tmp11, Register tmp12, Register tmp13);
 821 
 822   // Emitters for CRC32 calculation.
 823   // A note on invertCRC:
 824   //   Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
 825   //   CRC32 holds it's current crc value in the externally visible representation.
 826   //   CRC32C holds it's current crc value in internal format, ready for updating.
 827   //   Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
 828   //   In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
 829   //   The bool invertCRC parameter indicates whether bit-flipping is required before updates.
 830   void load_reverse_32(Register dst, Register src);
 831   int  crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
 832   void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
 833   void fold_8bit_crc32(Register crc, Register table, Register tmp);
 834   void update_byte_crc32(Register crc, Register val, Register table);

 795 #endif
 796 
 797   // Emitters for BigInteger.multiplyToLen intrinsic.
 798   inline void multiply64(Register dest_hi, Register dest_lo,
 799                          Register x, Register y);
 800   void add2_with_carry(Register dest_hi, Register dest_lo,
 801                        Register src1, Register src2);
 802   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
 803                              Register y, Register y_idx, Register z,
 804                              Register carry, Register product_high, Register product,
 805                              Register idx, Register kdx, Register tmp);
 806   void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
 807                               Register yz_idx, Register idx, Register carry,
 808                               Register product_high, Register product, Register tmp,
 809                               int offset);
 810   void multiply_128_x_128_loop(Register x_xstart,
 811                                Register y, Register z,
 812                                Register yz_idx, Register idx, Register carry,
 813                                Register product_high, Register product,
 814                                Register carry2, Register tmp);
 815   void muladd(Register out, Register in, Register offset, Register len, Register k,
 816               Register tmp1, Register tmp2, Register carry);
 817   void multiply_to_len(Register x, Register xlen,
 818                        Register y, Register ylen,
 819                        Register z, Register zlen,
 820                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
 821                        Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
 822                        Register tmp11, Register tmp12, Register tmp13);
 823 
 824   // Emitters for CRC32 calculation.
 825   // A note on invertCRC:
 826   //   Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
 827   //   CRC32 holds it's current crc value in the externally visible representation.
 828   //   CRC32C holds it's current crc value in internal format, ready for updating.
 829   //   Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
 830   //   In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
 831   //   The bool invertCRC parameter indicates whether bit-flipping is required before updates.
 832   void load_reverse_32(Register dst, Register src);
 833   int  crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
 834   void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
 835   void fold_8bit_crc32(Register crc, Register table, Register tmp);
 836   void update_byte_crc32(Register crc, Register val, Register table);

< prev index next >