コード例 #1
0
ファイル: m128_test.go プロジェクト: klauspost/intrinsics
func exp2f4(x x86.M128) x86.M128 {
	var ipart x86.M128i
	var fpart, expipart, expfpart x86.M128

	x = sse.MinPs(x, sse.Set1Ps(129))
	x = sse.MaxPs(x, sse.Set1Ps(-126.99999))

	/* ipart = int(x - 0.5) */
	ipart = sse2.CvtpsEpi32(sse.SubPs(x, sse.Set1Ps(0.5)))

	/* fpart = x - ipart */
	fpart = sse.SubPs(x, sse2.Cvtepi32Ps(ipart))

	/* expipart = (float) (1 << ipart) */
	expipart = sse2.Castsi128Ps(sse2.SlliEpi32(sse2.AddEpi32(ipart, sse2.Set1Epi32(127)), 23))

	/* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */
	if EXP_poly_DEGREE == 5 {
		expfpart = poly5(fpart, exp_p5_0, exp_p5_1, exp_p5_2, exp_p5_3, exp_p5_4, exp_p5_5)
	} else if EXP_poly_DEGREE == 4 {
		expfpart = poly4(fpart, exp_p4_0, exp_p4_1, exp_p4_2, exp_p4_3, exp_p4_4)
	} else if EXP_poly_DEGREE == 3 {
		expfpart = poly3(fpart, exp_p3_0, exp_p3_1, exp_p3_2, exp_p3_3)
	} else if EXP_poly_DEGREE == 2 {
		expfpart = poly2(fpart, exp_p2_0, exp_p2_1, exp_p2_2)
	} else {
		panic("invalid poly degree")
	}

	return sse.MulPs(expipart, expfpart)
}
コード例 #2
0
ファイル: m128_test.go プロジェクト: klauspost/intrinsics
func log2f4(x x86.M128) x86.M128 {
	exp := sse2.Set1Epi32(exp_mask)
	mant := sse2.Set1Epi32(mantissa_mask)
	one := sse.Set1Ps(1.0)
	i := sse2.CastpsSi128(x)
	e := sse2.Cvtepi32Ps(sse2.SubEpi32(sse2.SrliEpi32(sse2.AndSi128(i, exp), 23), sse2.Set1Epi32(127)))
	m := sse.OrPs(sse2.Castsi128Ps(sse2.AndSi128(i, mant)), one)
	var p x86.M128

	/* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ */

	if LOG_poly_DEGREE == 6 {
		p = poly5(m, log_p5_0, log_p5_1, log_p5_2, log_p5_3, log_p5_4, log_p5_5)
	} else if LOG_poly_DEGREE == 5 {
		p = poly4(m, log_p4_0, log_p4_1, log_p4_2, log_p4_3, log_p4_4)
	} else if LOG_poly_DEGREE == 4 {
		p = poly3(m, log_p3_0, log_p3_1, log_p3_2, log_p3_3)
	} else if LOG_poly_DEGREE == 3 {
		p = poly2(m, log_p2_0, log_p2_1, log_p2_2)
	} else {
		panic("unsupported poly degree")
	}

	/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
	p = sse.MulPs(p, sse.SubPs(m, one))

	return sse.AddPs(p, e)
}