5211 blt(CCR0, L_post_third_loop_done);
5212
5213 sldi(tmp, idx, LogBytesPerInt);
5214 lwzx(yz_idx, y, tmp);
5215 multiply64(product_high, product, x_xstart, yz_idx);
5216 lwzx(yz_idx, z, tmp);
5217
5218 add2_with_carry(product_high, product, yz_idx, carry);
5219
5220 sldi(tmp, idx, LogBytesPerInt);
5221 stwx(product, z, tmp);
5222 srdi(product, product, 32);
5223
5224 sldi(product_high, product_high, 32);
5225 orr(product, product, product_high);
5226 mr_if_needed(carry, product);
5227
5228 bind(L_post_third_loop_done);
5229 } // multiply_128_x_128_loop
5230
5231 void MacroAssembler::multiply_to_len(Register x, Register xlen,
5232 Register y, Register ylen,
5233 Register z, Register zlen,
5234 Register tmp1, Register tmp2,
5235 Register tmp3, Register tmp4,
5236 Register tmp5, Register tmp6,
5237 Register tmp7, Register tmp8,
5238 Register tmp9, Register tmp10,
5239 Register tmp11, Register tmp12,
5240 Register tmp13) {
5241
5242 ShortBranchVerifier sbv(this);
5243
5244 assert_different_registers(x, xlen, y, ylen, z, zlen,
5245 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
5246 assert_different_registers(x, xlen, y, ylen, z, zlen,
5247 tmp1, tmp2, tmp3, tmp4, tmp5, tmp7);
5248 assert_different_registers(x, xlen, y, ylen, z, zlen,
5249 tmp1, tmp2, tmp3, tmp4, tmp5, tmp8);
5250
|
5211 blt(CCR0, L_post_third_loop_done);
5212
5213 sldi(tmp, idx, LogBytesPerInt);
5214 lwzx(yz_idx, y, tmp);
5215 multiply64(product_high, product, x_xstart, yz_idx);
5216 lwzx(yz_idx, z, tmp);
5217
5218 add2_with_carry(product_high, product, yz_idx, carry);
5219
5220 sldi(tmp, idx, LogBytesPerInt);
5221 stwx(product, z, tmp);
5222 srdi(product, product, 32);
5223
5224 sldi(product_high, product_high, 32);
5225 orr(product, product, product_high);
5226 mr_if_needed(carry, product);
5227
5228 bind(L_post_third_loop_done);
5229 } // multiply_128_x_128_loop
5230
5231 void MacroAssembler::muladd(Register out, Register in,
5232 Register offset, Register len, Register k,
5233 Register tmp1, Register tmp2, Register carry) {
5234
5235 // Labels
5236 Label LOOP, SKIP;
5237
5238 // Make sure length is positive.
5239 cmpdi (CCR0, len, 0);
5240
5241 // Prepare variables
5242 subi (offset, offset, 4);
5243 li (carry, 0);
5244 ble (CCR0, SKIP);
5245
5246 mtctr (len);
5247 subi (len, len, 1 );
5248 sldi (len, len, 2 );
5249
5250 // Main loop
5251 bind(LOOP);
5252 lwzx (tmp1, len, in );
5253 lwzx (tmp2, offset, out );
5254 mulld (tmp1, tmp1, k );
5255 add (tmp2, carry, tmp2 );
5256 add (tmp2, tmp1, tmp2 );
5257 stwx (tmp2, offset, out );
5258 srdi (carry, tmp2, 32 );
5259 subi (offset, offset, 4 );
5260 subi (len, len, 4 );
5261 bdnz (LOOP);
5262 bind(SKIP);
5263 }
5264
5265 void MacroAssembler::multiply_to_len(Register x, Register xlen,
5266 Register y, Register ylen,
5267 Register z, Register zlen,
5268 Register tmp1, Register tmp2,
5269 Register tmp3, Register tmp4,
5270 Register tmp5, Register tmp6,
5271 Register tmp7, Register tmp8,
5272 Register tmp9, Register tmp10,
5273 Register tmp11, Register tmp12,
5274 Register tmp13) {
5275
5276 ShortBranchVerifier sbv(this);
5277
5278 assert_different_registers(x, xlen, y, ylen, z, zlen,
5279 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
5280 assert_different_registers(x, xlen, y, ylen, z, zlen,
5281 tmp1, tmp2, tmp3, tmp4, tmp5, tmp7);
5282 assert_different_registers(x, xlen, y, ylen, z, zlen,
5283 tmp1, tmp2, tmp3, tmp4, tmp5, tmp8);
5284
|