...

Text file src/math/exp_amd64.s

Documentation: math

		 1// Copyright 2010 The Go Authors. All rights reserved.
		 2// Use of this source code is governed by a BSD-style
		 3// license that can be found in the LICENSE file.
		 4
		 5#include "textflag.h"
		 6
		 7// The method is based on a paper by Naoki Shibata: "Efficient evaluation
		 8// methods of elementary functions suitable for SIMD computation", Proc.
		 9// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
		10// (May 2010). The paper is available at
		11// https://link.springer.com/article/10.1007/s00450-010-0108-2
		12//
		13// The original code and the constants below are from the author's
		14// implementation available at http://freshmeat.net/projects/sleef.
		15// The README file says, "The software is in public domain.
		16// You can use the software without any obligation."
		17//
		18// This code is a simplified version of the original.
		19
		20#define LN2 0.6931471805599453094172321214581766 // log_e(2)
		21#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
		22#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
		23#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
		24#define PosInf 0x7FF0000000000000
		25#define NegInf 0xFFF0000000000000
		26#define Overflow 7.09782712893384e+02
		27
		28DATA exprodata<>+0(SB)/8, $0.5
		29DATA exprodata<>+8(SB)/8, $1.0
		30DATA exprodata<>+16(SB)/8, $2.0
		31DATA exprodata<>+24(SB)/8, $1.6666666666666666667e-1
		32DATA exprodata<>+32(SB)/8, $4.1666666666666666667e-2
		33DATA exprodata<>+40(SB)/8, $8.3333333333333333333e-3
		34DATA exprodata<>+48(SB)/8, $1.3888888888888888889e-3
		35DATA exprodata<>+56(SB)/8, $1.9841269841269841270e-4
		36DATA exprodata<>+64(SB)/8, $2.4801587301587301587e-5
		37GLOBL exprodata<>+0(SB), RODATA, $72
		38
		39// func Exp(x float64) float64
		40TEXT ·archExp(SB),NOSPLIT,$0
		41	// test bits for not-finite
		42	MOVQ		x+0(FP), BX
		43	MOVQ		$~(1<<63), AX // sign bit mask
		44	MOVQ		BX, DX
		45	ANDQ		AX, DX
		46	MOVQ		$PosInf, AX
		47	CMPQ		AX, DX
		48	JLE		 notFinite
		49	// check if argument will overflow
		50	MOVQ		BX, X0
		51	MOVSD	 $Overflow, X1
		52	COMISD	X1, X0
		53	JA			overflow
		54	MOVSD	 $LOG2E, X1
		55	MULSD	 X0, X1
		56	CVTSD2SL X1, BX // BX = exponent
		57	CVTSL2SD BX, X1
		58	CMPB ·useFMA(SB), $1
		59	JE	 avxfma
		60	MOVSD	 $LN2U, X2
		61	MULSD	 X1, X2
		62	SUBSD	 X2, X0
		63	MOVSD	 $LN2L, X2
		64	MULSD	 X1, X2
		65	SUBSD	 X2, X0
		66	// reduce argument
		67	MULSD	 $0.0625, X0
		68	// Taylor series evaluation
		69	MOVSD	 exprodata<>+64(SB), X1
		70	MULSD	 X0, X1
		71	ADDSD	 exprodata<>+56(SB), X1
		72	MULSD	 X0, X1
		73	ADDSD	 exprodata<>+48(SB), X1
		74	MULSD	 X0, X1
		75	ADDSD	 exprodata<>+40(SB), X1
		76	MULSD	 X0, X1
		77	ADDSD	 exprodata<>+32(SB), X1
		78	MULSD	 X0, X1
		79	ADDSD	 exprodata<>+24(SB), X1
		80	MULSD	 X0, X1
		81	ADDSD	 exprodata<>+0(SB), X1
		82	MULSD	 X0, X1
		83	ADDSD	 exprodata<>+8(SB), X1
		84	MULSD	 X1, X0
		85	MOVSD	 exprodata<>+16(SB), X1
		86	ADDSD	 X0, X1
		87	MULSD	 X1, X0
		88	MOVSD	 exprodata<>+16(SB), X1
		89	ADDSD	 X0, X1
		90	MULSD	 X1, X0
		91	MOVSD	 exprodata<>+16(SB), X1
		92	ADDSD	 X0, X1
		93	MULSD	 X1, X0
		94	MOVSD	 exprodata<>+16(SB), X1
		95	ADDSD	 X0, X1
		96	MULSD	 X1, X0
		97	ADDSD exprodata<>+8(SB), X0
		98	// return fr * 2**exponent
		99ldexp:
	 100	ADDL		$0x3FF, BX // add bias
	 101	JLE		 denormal
	 102	CMPL		BX, $0x7FF
	 103	JGE		 overflow
	 104lastStep:
	 105	SHLQ		$52, BX
	 106	MOVQ		BX, X1
	 107	MULSD	 X1, X0
	 108	MOVSD	 X0, ret+8(FP)
	 109	RET
	 110notFinite:
	 111	// test bits for -Inf
	 112	MOVQ		$NegInf, AX
	 113	CMPQ		AX, BX
	 114	JNE		 notNegInf
	 115	// -Inf, return 0
	 116underflow: // return 0
	 117	MOVQ		$0, ret+8(FP)
	 118	RET
	 119overflow: // return +Inf
	 120	MOVQ		$PosInf, BX
	 121notNegInf: // NaN or +Inf, return x
	 122	MOVQ		BX, ret+8(FP)
	 123	RET
	 124denormal:
	 125	CMPL		BX, $-52
	 126	JL			underflow
	 127	ADDL		$0x3FE, BX // add bias - 1
	 128	SHLQ		$52, BX
	 129	MOVQ		BX, X1
	 130	MULSD	 X1, X0
	 131	MOVQ		$1, BX
	 132	JMP		 lastStep
	 133
	 134avxfma:
	 135	MOVSD	 $LN2U, X2
	 136	VFNMADD231SD X2, X1, X0
	 137	MOVSD	 $LN2L, X2
	 138	VFNMADD231SD X2, X1, X0
	 139	// reduce argument
	 140	MULSD	 $0.0625, X0
	 141	// Taylor series evaluation
	 142	MOVSD	 exprodata<>+64(SB), X1
	 143	VFMADD213SD exprodata<>+56(SB), X0, X1
	 144	VFMADD213SD exprodata<>+48(SB), X0, X1
	 145	VFMADD213SD exprodata<>+40(SB), X0, X1
	 146	VFMADD213SD exprodata<>+32(SB), X0, X1
	 147	VFMADD213SD exprodata<>+24(SB), X0, X1
	 148	VFMADD213SD exprodata<>+0(SB), X0, X1
	 149	VFMADD213SD exprodata<>+8(SB), X0, X1
	 150	MULSD	 X1, X0
	 151	VADDSD exprodata<>+16(SB), X0, X1
	 152	MULSD	 X1, X0
	 153	VADDSD exprodata<>+16(SB), X0, X1
	 154	MULSD	 X1, X0
	 155	VADDSD exprodata<>+16(SB), X0, X1
	 156	MULSD	 X1, X0
	 157	VADDSD exprodata<>+16(SB), X0, X1
	 158	VFMADD213SD	 exprodata<>+8(SB), X1, X0
	 159	JMP ldexp

View as plain text