...

Source file src/math/big/float.go

Documentation: math/big

		 1  // Copyright 2014 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // This file implements multi-precision floating-point numbers.
		 6  // Like in the GNU MPFR library (https://www.mpfr.org/), operands
		 7  // can be of mixed precision. Unlike MPFR, the rounding mode is
		 8  // not specified with each operation, but with each operand. The
		 9  // rounding mode of the result operand determines the rounding
		10  // mode of an operation. This is a from-scratch implementation.
		11  
		12  package big
		13  
		14  import (
		15  	"fmt"
		16  	"math"
		17  	"math/bits"
		18  )
		19  
		20  const debugFloat = false // enable for debugging
		21  
		22  // A nonzero finite Float represents a multi-precision floating point number
		23  //
		24  //	 sign × mantissa × 2**exponent
		25  //
		26  // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
		27  // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
		28  // All Floats are ordered, and the ordering of two Floats x and y
		29  // is defined by x.Cmp(y).
		30  //
		31  // Each Float value also has a precision, rounding mode, and accuracy.
		32  // The precision is the maximum number of mantissa bits available to
		33  // represent the value. The rounding mode specifies how a result should
		34  // be rounded to fit into the mantissa bits, and accuracy describes the
		35  // rounding error with respect to the exact result.
		36  //
		37  // Unless specified otherwise, all operations (including setters) that
		38  // specify a *Float variable for the result (usually via the receiver
		39  // with the exception of MantExp), round the numeric result according
		40  // to the precision and rounding mode of the result variable.
		41  //
		42  // If the provided result precision is 0 (see below), it is set to the
		43  // precision of the argument with the largest precision value before any
		44  // rounding takes place, and the rounding mode remains unchanged. Thus,
		45  // uninitialized Floats provided as result arguments will have their
		46  // precision set to a reasonable value determined by the operands, and
		47  // their mode is the zero value for RoundingMode (ToNearestEven).
		48  //
		49  // By setting the desired precision to 24 or 53 and using matching rounding
		50  // mode (typically ToNearestEven), Float operations produce the same results
		51  // as the corresponding float32 or float64 IEEE-754 arithmetic for operands
		52  // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
		53  // Exponent underflow and overflow lead to a 0 or an Infinity for different
		54  // values than IEEE-754 because Float exponents have a much larger range.
		55  //
		56  // The zero (uninitialized) value for a Float is ready to use and represents
		57  // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
		58  //
		59  // Operations always take pointer arguments (*Float) rather
		60  // than Float values, and each unique Float value requires
		61  // its own unique *Float pointer. To "copy" a Float value,
		62  // an existing (or newly allocated) Float must be set to
		63  // a new value using the Float.Set method; shallow copies
		64  // of Floats are not supported and may lead to errors.
		65  type Float struct {
		66  	prec uint32
		67  	mode RoundingMode
		68  	acc	Accuracy
		69  	form form
		70  	neg	bool
		71  	mant nat
		72  	exp	int32
		73  }
		74  
		75  // An ErrNaN panic is raised by a Float operation that would lead to
		76  // a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
		77  type ErrNaN struct {
		78  	msg string
		79  }
		80  
		81  func (err ErrNaN) Error() string {
		82  	return err.msg
		83  }
		84  
		85  // NewFloat allocates and returns a new Float set to x,
		86  // with precision 53 and rounding mode ToNearestEven.
		87  // NewFloat panics with ErrNaN if x is a NaN.
		88  func NewFloat(x float64) *Float {
		89  	if math.IsNaN(x) {
		90  		panic(ErrNaN{"NewFloat(NaN)"})
		91  	}
		92  	return new(Float).SetFloat64(x)
		93  }
		94  
		95  // Exponent and precision limits.
		96  const (
		97  	MaxExp	= math.MaxInt32	// largest supported exponent
		98  	MinExp	= math.MinInt32	// smallest supported exponent
		99  	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
	 100  )
	 101  
	 102  // Internal representation: The mantissa bits x.mant of a nonzero finite
	 103  // Float x are stored in a nat slice long enough to hold up to x.prec bits;
	 104  // the slice may (but doesn't have to) be shorter if the mantissa contains
	 105  // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
	 106  // the msb is shifted all the way "to the left"). Thus, if the mantissa has
	 107  // trailing 0 bits or x.prec is not a multiple of the Word size _W,
	 108  // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
	 109  // to the value 0.5; the exponent x.exp shifts the binary point as needed.
	 110  //
	 111  // A zero or non-finite Float x ignores x.mant and x.exp.
	 112  //
	 113  // x								 form			neg			mant				 exp
	 114  // ----------------------------------------------------------
	 115  // ±0								zero			sign		 -						-
	 116  // 0 < |x| < +Inf		finite		sign		 mantissa		 exponent
	 117  // ±Inf							inf			 sign		 -						-
	 118  
	 119  // A form value describes the internal representation.
	 120  type form byte
	 121  
	 122  // The form value order is relevant - do not change!
	 123  const (
	 124  	zero form = iota
	 125  	finite
	 126  	inf
	 127  )
	 128  
	 129  // RoundingMode determines how a Float value is rounded to the
	 130  // desired precision. Rounding may change the Float value; the
	 131  // rounding error is described by the Float's Accuracy.
	 132  type RoundingMode byte
	 133  
	 134  // These constants define supported rounding modes.
	 135  const (
	 136  	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
	 137  	ToNearestAway										 // == IEEE 754-2008 roundTiesToAway
	 138  	ToZero														// == IEEE 754-2008 roundTowardZero
	 139  	AwayFromZero											// no IEEE 754-2008 equivalent
	 140  	ToNegativeInf										 // == IEEE 754-2008 roundTowardNegative
	 141  	ToPositiveInf										 // == IEEE 754-2008 roundTowardPositive
	 142  )
	 143  
	 144  //go:generate stringer -type=RoundingMode
	 145  
	 146  // Accuracy describes the rounding error produced by the most recent
	 147  // operation that generated a Float value, relative to the exact value.
	 148  type Accuracy int8
	 149  
	 150  // Constants describing the Accuracy of a Float.
	 151  const (
	 152  	Below Accuracy = -1
	 153  	Exact Accuracy = 0
	 154  	Above Accuracy = +1
	 155  )
	 156  
	 157  //go:generate stringer -type=Accuracy
	 158  
	 159  // SetPrec sets z's precision to prec and returns the (possibly) rounded
	 160  // value of z. Rounding occurs according to z's rounding mode if the mantissa
	 161  // cannot be represented in prec bits without loss of precision.
	 162  // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
	 163  // If prec > MaxPrec, it is set to MaxPrec.
	 164  func (z *Float) SetPrec(prec uint) *Float {
	 165  	z.acc = Exact // optimistically assume no rounding is needed
	 166  
	 167  	// special case
	 168  	if prec == 0 {
	 169  		z.prec = 0
	 170  		if z.form == finite {
	 171  			// truncate z to 0
	 172  			z.acc = makeAcc(z.neg)
	 173  			z.form = zero
	 174  		}
	 175  		return z
	 176  	}
	 177  
	 178  	// general case
	 179  	if prec > MaxPrec {
	 180  		prec = MaxPrec
	 181  	}
	 182  	old := z.prec
	 183  	z.prec = uint32(prec)
	 184  	if z.prec < old {
	 185  		z.round(0)
	 186  	}
	 187  	return z
	 188  }
	 189  
	 190  func makeAcc(above bool) Accuracy {
	 191  	if above {
	 192  		return Above
	 193  	}
	 194  	return Below
	 195  }
	 196  
	 197  // SetMode sets z's rounding mode to mode and returns an exact z.
	 198  // z remains unchanged otherwise.
	 199  // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
	 200  func (z *Float) SetMode(mode RoundingMode) *Float {
	 201  	z.mode = mode
	 202  	z.acc = Exact
	 203  	return z
	 204  }
	 205  
	 206  // Prec returns the mantissa precision of x in bits.
	 207  // The result may be 0 for |x| == 0 and |x| == Inf.
	 208  func (x *Float) Prec() uint {
	 209  	return uint(x.prec)
	 210  }
	 211  
	 212  // MinPrec returns the minimum precision required to represent x exactly
	 213  // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
	 214  // The result is 0 for |x| == 0 and |x| == Inf.
	 215  func (x *Float) MinPrec() uint {
	 216  	if x.form != finite {
	 217  		return 0
	 218  	}
	 219  	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
	 220  }
	 221  
	 222  // Mode returns the rounding mode of x.
	 223  func (x *Float) Mode() RoundingMode {
	 224  	return x.mode
	 225  }
	 226  
	 227  // Acc returns the accuracy of x produced by the most recent
	 228  // operation, unless explicitly documented otherwise by that
	 229  // operation.
	 230  func (x *Float) Acc() Accuracy {
	 231  	return x.acc
	 232  }
	 233  
	 234  // Sign returns:
	 235  //
	 236  //	-1 if x <	 0
	 237  //	 0 if x is ±0
	 238  //	+1 if x >	 0
	 239  //
	 240  func (x *Float) Sign() int {
	 241  	if debugFloat {
	 242  		x.validate()
	 243  	}
	 244  	if x.form == zero {
	 245  		return 0
	 246  	}
	 247  	if x.neg {
	 248  		return -1
	 249  	}
	 250  	return 1
	 251  }
	 252  
	 253  // MantExp breaks x into its mantissa and exponent components
	 254  // and returns the exponent. If a non-nil mant argument is
	 255  // provided its value is set to the mantissa of x, with the
	 256  // same precision and rounding mode as x. The components
	 257  // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
	 258  // Calling MantExp with a nil argument is an efficient way to
	 259  // get the exponent of the receiver.
	 260  //
	 261  // Special cases are:
	 262  //
	 263  //	(	±0).MantExp(mant) = 0, with mant set to	 ±0
	 264  //	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
	 265  //
	 266  // x and mant may be the same in which case x is set to its
	 267  // mantissa value.
	 268  func (x *Float) MantExp(mant *Float) (exp int) {
	 269  	if debugFloat {
	 270  		x.validate()
	 271  	}
	 272  	if x.form == finite {
	 273  		exp = int(x.exp)
	 274  	}
	 275  	if mant != nil {
	 276  		mant.Copy(x)
	 277  		if mant.form == finite {
	 278  			mant.exp = 0
	 279  		}
	 280  	}
	 281  	return
	 282  }
	 283  
	 284  func (z *Float) setExpAndRound(exp int64, sbit uint) {
	 285  	if exp < MinExp {
	 286  		// underflow
	 287  		z.acc = makeAcc(z.neg)
	 288  		z.form = zero
	 289  		return
	 290  	}
	 291  
	 292  	if exp > MaxExp {
	 293  		// overflow
	 294  		z.acc = makeAcc(!z.neg)
	 295  		z.form = inf
	 296  		return
	 297  	}
	 298  
	 299  	z.form = finite
	 300  	z.exp = int32(exp)
	 301  	z.round(sbit)
	 302  }
	 303  
	 304  // SetMantExp sets z to mant × 2**exp and returns z.
	 305  // The result z has the same precision and rounding mode
	 306  // as mant. SetMantExp is an inverse of MantExp but does
	 307  // not require 0.5 <= |mant| < 1.0. Specifically:
	 308  //
	 309  //	mant := new(Float)
	 310  //	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
	 311  //
	 312  // Special cases are:
	 313  //
	 314  //	z.SetMantExp(	±0, exp) =	 ±0
	 315  //	z.SetMantExp(±Inf, exp) = ±Inf
	 316  //
	 317  // z and mant may be the same in which case z's exponent
	 318  // is set to exp.
	 319  func (z *Float) SetMantExp(mant *Float, exp int) *Float {
	 320  	if debugFloat {
	 321  		z.validate()
	 322  		mant.validate()
	 323  	}
	 324  	z.Copy(mant)
	 325  
	 326  	if z.form == finite {
	 327  		// 0 < |mant| < +Inf
	 328  		z.setExpAndRound(int64(z.exp)+int64(exp), 0)
	 329  	}
	 330  	return z
	 331  }
	 332  
	 333  // Signbit reports whether x is negative or negative zero.
	 334  func (x *Float) Signbit() bool {
	 335  	return x.neg
	 336  }
	 337  
	 338  // IsInf reports whether x is +Inf or -Inf.
	 339  func (x *Float) IsInf() bool {
	 340  	return x.form == inf
	 341  }
	 342  
	 343  // IsInt reports whether x is an integer.
	 344  // ±Inf values are not integers.
	 345  func (x *Float) IsInt() bool {
	 346  	if debugFloat {
	 347  		x.validate()
	 348  	}
	 349  	// special cases
	 350  	if x.form != finite {
	 351  		return x.form == zero
	 352  	}
	 353  	// x.form == finite
	 354  	if x.exp <= 0 {
	 355  		return false
	 356  	}
	 357  	// x.exp > 0
	 358  	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
	 359  }
	 360  
	 361  // debugging support
	 362  func (x *Float) validate() {
	 363  	if !debugFloat {
	 364  		// avoid performance bugs
	 365  		panic("validate called but debugFloat is not set")
	 366  	}
	 367  	if x.form != finite {
	 368  		return
	 369  	}
	 370  	m := len(x.mant)
	 371  	if m == 0 {
	 372  		panic("nonzero finite number with empty mantissa")
	 373  	}
	 374  	const msb = 1 << (_W - 1)
	 375  	if x.mant[m-1]&msb == 0 {
	 376  		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
	 377  	}
	 378  	if x.prec == 0 {
	 379  		panic("zero precision finite number")
	 380  	}
	 381  }
	 382  
	 383  // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
	 384  // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
	 385  // have before calling round. z's mantissa must be normalized (with the msb set)
	 386  // or empty.
	 387  //
	 388  // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
	 389  // sign of z. For correct rounding, the sign of z must be set correctly before
	 390  // calling round.
	 391  func (z *Float) round(sbit uint) {
	 392  	if debugFloat {
	 393  		z.validate()
	 394  	}
	 395  
	 396  	z.acc = Exact
	 397  	if z.form != finite {
	 398  		// ±0 or ±Inf => nothing left to do
	 399  		return
	 400  	}
	 401  	// z.form == finite && len(z.mant) > 0
	 402  	// m > 0 implies z.prec > 0 (checked by validate)
	 403  
	 404  	m := uint32(len(z.mant)) // present mantissa length in words
	 405  	bits := m * _W					 // present mantissa bits; bits > 0
	 406  	if bits <= z.prec {
	 407  		// mantissa fits => nothing to do
	 408  		return
	 409  	}
	 410  	// bits > z.prec
	 411  
	 412  	// Rounding is based on two bits: the rounding bit (rbit) and the
	 413  	// sticky bit (sbit). The rbit is the bit immediately before the
	 414  	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
	 415  	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
	 416  	//
	 417  	//	 rbit	sbit	=> "fractional part"
	 418  	//
	 419  	//	 0		 0				== 0
	 420  	//	 0		 1				>	0	, < 0.5
	 421  	//	 1		 0				== 0.5
	 422  	//	 1		 1				>	0.5, < 1.0
	 423  
	 424  	// bits > z.prec: mantissa too large => round
	 425  	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
	 426  	rbit := z.mant.bit(r) & 1		// rounding bit; be safe and ensure it's a single bit
	 427  	// The sticky bit is only needed for rounding ToNearestEven
	 428  	// or when the rounding bit is zero. Avoid computation otherwise.
	 429  	if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) {
	 430  		sbit = z.mant.sticky(r)
	 431  	}
	 432  	sbit &= 1 // be safe and ensure it's a single bit
	 433  
	 434  	// cut off extra words
	 435  	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
	 436  	if m > n {
	 437  		copy(z.mant, z.mant[m-n:]) // move n last words to front
	 438  		z.mant = z.mant[:n]
	 439  	}
	 440  
	 441  	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
	 442  	ntz := n*_W - z.prec // 0 <= ntz < _W
	 443  	lsb := Word(1) << ntz
	 444  
	 445  	// round if result is inexact
	 446  	if rbit|sbit != 0 {
	 447  		// Make rounding decision: The result mantissa is truncated ("rounded down")
	 448  		// by default. Decide if we need to increment, or "round up", the (unsigned)
	 449  		// mantissa.
	 450  		inc := false
	 451  		switch z.mode {
	 452  		case ToNegativeInf:
	 453  			inc = z.neg
	 454  		case ToZero:
	 455  			// nothing to do
	 456  		case ToNearestEven:
	 457  			inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
	 458  		case ToNearestAway:
	 459  			inc = rbit != 0
	 460  		case AwayFromZero:
	 461  			inc = true
	 462  		case ToPositiveInf:
	 463  			inc = !z.neg
	 464  		default:
	 465  			panic("unreachable")
	 466  		}
	 467  
	 468  		// A positive result (!z.neg) is Above the exact result if we increment,
	 469  		// and it's Below if we truncate (Exact results require no rounding).
	 470  		// For a negative result (z.neg) it is exactly the opposite.
	 471  		z.acc = makeAcc(inc != z.neg)
	 472  
	 473  		if inc {
	 474  			// add 1 to mantissa
	 475  			if addVW(z.mant, z.mant, lsb) != 0 {
	 476  				// mantissa overflow => adjust exponent
	 477  				if z.exp >= MaxExp {
	 478  					// exponent overflow
	 479  					z.form = inf
	 480  					return
	 481  				}
	 482  				z.exp++
	 483  				// adjust mantissa: divide by 2 to compensate for exponent adjustment
	 484  				shrVU(z.mant, z.mant, 1)
	 485  				// set msb == carry == 1 from the mantissa overflow above
	 486  				const msb = 1 << (_W - 1)
	 487  				z.mant[n-1] |= msb
	 488  			}
	 489  		}
	 490  	}
	 491  
	 492  	// zero out trailing bits in least-significant word
	 493  	z.mant[0] &^= lsb - 1
	 494  
	 495  	if debugFloat {
	 496  		z.validate()
	 497  	}
	 498  }
	 499  
	 500  func (z *Float) setBits64(neg bool, x uint64) *Float {
	 501  	if z.prec == 0 {
	 502  		z.prec = 64
	 503  	}
	 504  	z.acc = Exact
	 505  	z.neg = neg
	 506  	if x == 0 {
	 507  		z.form = zero
	 508  		return z
	 509  	}
	 510  	// x != 0
	 511  	z.form = finite
	 512  	s := bits.LeadingZeros64(x)
	 513  	z.mant = z.mant.setUint64(x << uint(s))
	 514  	z.exp = int32(64 - s) // always fits
	 515  	if z.prec < 64 {
	 516  		z.round(0)
	 517  	}
	 518  	return z
	 519  }
	 520  
	 521  // SetUint64 sets z to the (possibly rounded) value of x and returns z.
	 522  // If z's precision is 0, it is changed to 64 (and rounding will have
	 523  // no effect).
	 524  func (z *Float) SetUint64(x uint64) *Float {
	 525  	return z.setBits64(false, x)
	 526  }
	 527  
	 528  // SetInt64 sets z to the (possibly rounded) value of x and returns z.
	 529  // If z's precision is 0, it is changed to 64 (and rounding will have
	 530  // no effect).
	 531  func (z *Float) SetInt64(x int64) *Float {
	 532  	u := x
	 533  	if u < 0 {
	 534  		u = -u
	 535  	}
	 536  	// We cannot simply call z.SetUint64(uint64(u)) and change
	 537  	// the sign afterwards because the sign affects rounding.
	 538  	return z.setBits64(x < 0, uint64(u))
	 539  }
	 540  
	 541  // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
	 542  // If z's precision is 0, it is changed to 53 (and rounding will have
	 543  // no effect). SetFloat64 panics with ErrNaN if x is a NaN.
	 544  func (z *Float) SetFloat64(x float64) *Float {
	 545  	if z.prec == 0 {
	 546  		z.prec = 53
	 547  	}
	 548  	if math.IsNaN(x) {
	 549  		panic(ErrNaN{"Float.SetFloat64(NaN)"})
	 550  	}
	 551  	z.acc = Exact
	 552  	z.neg = math.Signbit(x) // handle -0, -Inf correctly
	 553  	if x == 0 {
	 554  		z.form = zero
	 555  		return z
	 556  	}
	 557  	if math.IsInf(x, 0) {
	 558  		z.form = inf
	 559  		return z
	 560  	}
	 561  	// normalized x != 0
	 562  	z.form = finite
	 563  	fmant, exp := math.Frexp(x) // get normalized mantissa
	 564  	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
	 565  	z.exp = int32(exp) // always fits
	 566  	if z.prec < 53 {
	 567  		z.round(0)
	 568  	}
	 569  	return z
	 570  }
	 571  
	 572  // fnorm normalizes mantissa m by shifting it to the left
	 573  // such that the msb of the most-significant word (msw) is 1.
	 574  // It returns the shift amount. It assumes that len(m) != 0.
	 575  func fnorm(m nat) int64 {
	 576  	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
	 577  		panic("msw of mantissa is 0")
	 578  	}
	 579  	s := nlz(m[len(m)-1])
	 580  	if s > 0 {
	 581  		c := shlVU(m, m, s)
	 582  		if debugFloat && c != 0 {
	 583  			panic("nlz or shlVU incorrect")
	 584  		}
	 585  	}
	 586  	return int64(s)
	 587  }
	 588  
	 589  // SetInt sets z to the (possibly rounded) value of x and returns z.
	 590  // If z's precision is 0, it is changed to the larger of x.BitLen()
	 591  // or 64 (and rounding will have no effect).
	 592  func (z *Float) SetInt(x *Int) *Float {
	 593  	// TODO(gri) can be more efficient if z.prec > 0
	 594  	// but small compared to the size of x, or if there
	 595  	// are many trailing 0's.
	 596  	bits := uint32(x.BitLen())
	 597  	if z.prec == 0 {
	 598  		z.prec = umax32(bits, 64)
	 599  	}
	 600  	z.acc = Exact
	 601  	z.neg = x.neg
	 602  	if len(x.abs) == 0 {
	 603  		z.form = zero
	 604  		return z
	 605  	}
	 606  	// x != 0
	 607  	z.mant = z.mant.set(x.abs)
	 608  	fnorm(z.mant)
	 609  	z.setExpAndRound(int64(bits), 0)
	 610  	return z
	 611  }
	 612  
	 613  // SetRat sets z to the (possibly rounded) value of x and returns z.
	 614  // If z's precision is 0, it is changed to the largest of a.BitLen(),
	 615  // b.BitLen(), or 64; with x = a/b.
	 616  func (z *Float) SetRat(x *Rat) *Float {
	 617  	if x.IsInt() {
	 618  		return z.SetInt(x.Num())
	 619  	}
	 620  	var a, b Float
	 621  	a.SetInt(x.Num())
	 622  	b.SetInt(x.Denom())
	 623  	if z.prec == 0 {
	 624  		z.prec = umax32(a.prec, b.prec)
	 625  	}
	 626  	return z.Quo(&a, &b)
	 627  }
	 628  
	 629  // SetInf sets z to the infinite Float -Inf if signbit is
	 630  // set, or +Inf if signbit is not set, and returns z. The
	 631  // precision of z is unchanged and the result is always
	 632  // Exact.
	 633  func (z *Float) SetInf(signbit bool) *Float {
	 634  	z.acc = Exact
	 635  	z.form = inf
	 636  	z.neg = signbit
	 637  	return z
	 638  }
	 639  
	 640  // Set sets z to the (possibly rounded) value of x and returns z.
	 641  // If z's precision is 0, it is changed to the precision of x
	 642  // before setting z (and rounding will have no effect).
	 643  // Rounding is performed according to z's precision and rounding
	 644  // mode; and z's accuracy reports the result error relative to the
	 645  // exact (not rounded) result.
	 646  func (z *Float) Set(x *Float) *Float {
	 647  	if debugFloat {
	 648  		x.validate()
	 649  	}
	 650  	z.acc = Exact
	 651  	if z != x {
	 652  		z.form = x.form
	 653  		z.neg = x.neg
	 654  		if x.form == finite {
	 655  			z.exp = x.exp
	 656  			z.mant = z.mant.set(x.mant)
	 657  		}
	 658  		if z.prec == 0 {
	 659  			z.prec = x.prec
	 660  		} else if z.prec < x.prec {
	 661  			z.round(0)
	 662  		}
	 663  	}
	 664  	return z
	 665  }
	 666  
	 667  // Copy sets z to x, with the same precision, rounding mode, and
	 668  // accuracy as x, and returns z. x is not changed even if z and
	 669  // x are the same.
	 670  func (z *Float) Copy(x *Float) *Float {
	 671  	if debugFloat {
	 672  		x.validate()
	 673  	}
	 674  	if z != x {
	 675  		z.prec = x.prec
	 676  		z.mode = x.mode
	 677  		z.acc = x.acc
	 678  		z.form = x.form
	 679  		z.neg = x.neg
	 680  		if z.form == finite {
	 681  			z.mant = z.mant.set(x.mant)
	 682  			z.exp = x.exp
	 683  		}
	 684  	}
	 685  	return z
	 686  }
	 687  
	 688  // msb32 returns the 32 most significant bits of x.
	 689  func msb32(x nat) uint32 {
	 690  	i := len(x) - 1
	 691  	if i < 0 {
	 692  		return 0
	 693  	}
	 694  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
	 695  		panic("x not normalized")
	 696  	}
	 697  	switch _W {
	 698  	case 32:
	 699  		return uint32(x[i])
	 700  	case 64:
	 701  		return uint32(x[i] >> 32)
	 702  	}
	 703  	panic("unreachable")
	 704  }
	 705  
	 706  // msb64 returns the 64 most significant bits of x.
	 707  func msb64(x nat) uint64 {
	 708  	i := len(x) - 1
	 709  	if i < 0 {
	 710  		return 0
	 711  	}
	 712  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
	 713  		panic("x not normalized")
	 714  	}
	 715  	switch _W {
	 716  	case 32:
	 717  		v := uint64(x[i]) << 32
	 718  		if i > 0 {
	 719  			v |= uint64(x[i-1])
	 720  		}
	 721  		return v
	 722  	case 64:
	 723  		return uint64(x[i])
	 724  	}
	 725  	panic("unreachable")
	 726  }
	 727  
	 728  // Uint64 returns the unsigned integer resulting from truncating x
	 729  // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
	 730  // if x is an integer and Below otherwise.
	 731  // The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
	 732  // for x > math.MaxUint64.
	 733  func (x *Float) Uint64() (uint64, Accuracy) {
	 734  	if debugFloat {
	 735  		x.validate()
	 736  	}
	 737  
	 738  	switch x.form {
	 739  	case finite:
	 740  		if x.neg {
	 741  			return 0, Above
	 742  		}
	 743  		// 0 < x < +Inf
	 744  		if x.exp <= 0 {
	 745  			// 0 < x < 1
	 746  			return 0, Below
	 747  		}
	 748  		// 1 <= x < Inf
	 749  		if x.exp <= 64 {
	 750  			// u = trunc(x) fits into a uint64
	 751  			u := msb64(x.mant) >> (64 - uint32(x.exp))
	 752  			if x.MinPrec() <= 64 {
	 753  				return u, Exact
	 754  			}
	 755  			return u, Below // x truncated
	 756  		}
	 757  		// x too large
	 758  		return math.MaxUint64, Below
	 759  
	 760  	case zero:
	 761  		return 0, Exact
	 762  
	 763  	case inf:
	 764  		if x.neg {
	 765  			return 0, Above
	 766  		}
	 767  		return math.MaxUint64, Below
	 768  	}
	 769  
	 770  	panic("unreachable")
	 771  }
	 772  
	 773  // Int64 returns the integer resulting from truncating x towards zero.
	 774  // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
	 775  // an integer, and Above (x < 0) or Below (x > 0) otherwise.
	 776  // The result is (math.MinInt64, Above) for x < math.MinInt64,
	 777  // and (math.MaxInt64, Below) for x > math.MaxInt64.
	 778  func (x *Float) Int64() (int64, Accuracy) {
	 779  	if debugFloat {
	 780  		x.validate()
	 781  	}
	 782  
	 783  	switch x.form {
	 784  	case finite:
	 785  		// 0 < |x| < +Inf
	 786  		acc := makeAcc(x.neg)
	 787  		if x.exp <= 0 {
	 788  			// 0 < |x| < 1
	 789  			return 0, acc
	 790  		}
	 791  		// x.exp > 0
	 792  
	 793  		// 1 <= |x| < +Inf
	 794  		if x.exp <= 63 {
	 795  			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
	 796  			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
	 797  			if x.neg {
	 798  				i = -i
	 799  			}
	 800  			if x.MinPrec() <= uint(x.exp) {
	 801  				return i, Exact
	 802  			}
	 803  			return i, acc // x truncated
	 804  		}
	 805  		if x.neg {
	 806  			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
	 807  			if x.exp == 64 && x.MinPrec() == 1 {
	 808  				acc = Exact
	 809  			}
	 810  			return math.MinInt64, acc
	 811  		}
	 812  		// x too large
	 813  		return math.MaxInt64, Below
	 814  
	 815  	case zero:
	 816  		return 0, Exact
	 817  
	 818  	case inf:
	 819  		if x.neg {
	 820  			return math.MinInt64, Above
	 821  		}
	 822  		return math.MaxInt64, Below
	 823  	}
	 824  
	 825  	panic("unreachable")
	 826  }
	 827  
	 828  // Float32 returns the float32 value nearest to x. If x is too small to be
	 829  // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
	 830  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
	 831  // If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
	 832  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
	 833  func (x *Float) Float32() (float32, Accuracy) {
	 834  	if debugFloat {
	 835  		x.validate()
	 836  	}
	 837  
	 838  	switch x.form {
	 839  	case finite:
	 840  		// 0 < |x| < +Inf
	 841  
	 842  		const (
	 843  			fbits = 32								//				float size
	 844  			mbits = 23								//				mantissa size (excluding implicit msb)
	 845  			ebits = fbits - mbits - 1 //		 8	exponent size
	 846  			bias	= 1<<(ebits-1) - 1	//	 127	exponent bias
	 847  			dmin	= 1 - bias - mbits	//	-149	smallest unbiased exponent (denormal)
	 848  			emin	= 1 - bias					//	-126	smallest unbiased exponent (normal)
	 849  			emax	= bias							//	 127	largest unbiased exponent (normal)
	 850  		)
	 851  
	 852  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
	 853  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
	 854  
	 855  		// Compute precision p for float32 mantissa.
	 856  		// If the exponent is too small, we have a denormal number before
	 857  		// rounding and fewer than p mantissa bits of precision available
	 858  		// (the exponent remains fixed but the mantissa gets shifted right).
	 859  		p := mbits + 1 // precision of normal float
	 860  		if e < emin {
	 861  			// recompute precision
	 862  			p = mbits + 1 - emin + int(e)
	 863  			// If p == 0, the mantissa of x is shifted so much to the right
	 864  			// that its msb falls immediately to the right of the float32
	 865  			// mantissa space. In other words, if the smallest denormal is
	 866  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
	 867  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
	 868  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
	 869  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
	 870  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
	 871  				// underflow to ±0
	 872  				if x.neg {
	 873  					var z float32
	 874  					return -z, Above
	 875  				}
	 876  				return 0.0, Below
	 877  			}
	 878  			// otherwise, round up
	 879  			// We handle p == 0 explicitly because it's easy and because
	 880  			// Float.round doesn't support rounding to 0 bits of precision.
	 881  			if p == 0 {
	 882  				if x.neg {
	 883  					return -math.SmallestNonzeroFloat32, Below
	 884  				}
	 885  				return math.SmallestNonzeroFloat32, Above
	 886  			}
	 887  		}
	 888  		// p > 0
	 889  
	 890  		// round
	 891  		var r Float
	 892  		r.prec = uint32(p)
	 893  		r.Set(x)
	 894  		e = r.exp - 1
	 895  
	 896  		// Rounding may have caused r to overflow to ±Inf
	 897  		// (rounding never causes underflows to 0).
	 898  		// If the exponent is too large, also overflow to ±Inf.
	 899  		if r.form == inf || e > emax {
	 900  			// overflow
	 901  			if x.neg {
	 902  				return float32(math.Inf(-1)), Below
	 903  			}
	 904  			return float32(math.Inf(+1)), Above
	 905  		}
	 906  		// e <= emax
	 907  
	 908  		// Determine sign, biased exponent, and mantissa.
	 909  		var sign, bexp, mant uint32
	 910  		if x.neg {
	 911  			sign = 1 << (fbits - 1)
	 912  		}
	 913  
	 914  		// Rounding may have caused a denormal number to
	 915  		// become normal. Check again.
	 916  		if e < emin {
	 917  			// denormal number: recompute precision
	 918  			// Since rounding may have at best increased precision
	 919  			// and we have eliminated p <= 0 early, we know p > 0.
	 920  			// bexp == 0 for denormals
	 921  			p = mbits + 1 - emin + int(e)
	 922  			mant = msb32(r.mant) >> uint(fbits-p)
	 923  		} else {
	 924  			// normal number: emin <= e <= emax
	 925  			bexp = uint32(e+bias) << mbits
	 926  			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
	 927  		}
	 928  
	 929  		return math.Float32frombits(sign | bexp | mant), r.acc
	 930  
	 931  	case zero:
	 932  		if x.neg {
	 933  			var z float32
	 934  			return -z, Exact
	 935  		}
	 936  		return 0.0, Exact
	 937  
	 938  	case inf:
	 939  		if x.neg {
	 940  			return float32(math.Inf(-1)), Exact
	 941  		}
	 942  		return float32(math.Inf(+1)), Exact
	 943  	}
	 944  
	 945  	panic("unreachable")
	 946  }
	 947  
	 948  // Float64 returns the float64 value nearest to x. If x is too small to be
	 949  // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
	 950  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
	 951  // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
	 952  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
	 953  func (x *Float) Float64() (float64, Accuracy) {
	 954  	if debugFloat {
	 955  		x.validate()
	 956  	}
	 957  
	 958  	switch x.form {
	 959  	case finite:
	 960  		// 0 < |x| < +Inf
	 961  
	 962  		const (
	 963  			fbits = 64								//				float size
	 964  			mbits = 52								//				mantissa size (excluding implicit msb)
	 965  			ebits = fbits - mbits - 1 //		11	exponent size
	 966  			bias	= 1<<(ebits-1) - 1	//	1023	exponent bias
	 967  			dmin	= 1 - bias - mbits	// -1074	smallest unbiased exponent (denormal)
	 968  			emin	= 1 - bias					// -1022	smallest unbiased exponent (normal)
	 969  			emax	= bias							//	1023	largest unbiased exponent (normal)
	 970  		)
	 971  
	 972  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
	 973  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
	 974  
	 975  		// Compute precision p for float64 mantissa.
	 976  		// If the exponent is too small, we have a denormal number before
	 977  		// rounding and fewer than p mantissa bits of precision available
	 978  		// (the exponent remains fixed but the mantissa gets shifted right).
	 979  		p := mbits + 1 // precision of normal float
	 980  		if e < emin {
	 981  			// recompute precision
	 982  			p = mbits + 1 - emin + int(e)
	 983  			// If p == 0, the mantissa of x is shifted so much to the right
	 984  			// that its msb falls immediately to the right of the float64
	 985  			// mantissa space. In other words, if the smallest denormal is
	 986  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
	 987  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
	 988  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
	 989  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
	 990  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
	 991  				// underflow to ±0
	 992  				if x.neg {
	 993  					var z float64
	 994  					return -z, Above
	 995  				}
	 996  				return 0.0, Below
	 997  			}
	 998  			// otherwise, round up
	 999  			// We handle p == 0 explicitly because it's easy and because
	1000  			// Float.round doesn't support rounding to 0 bits of precision.
	1001  			if p == 0 {
	1002  				if x.neg {
	1003  					return -math.SmallestNonzeroFloat64, Below
	1004  				}
	1005  				return math.SmallestNonzeroFloat64, Above
	1006  			}
	1007  		}
	1008  		// p > 0
	1009  
	1010  		// round
	1011  		var r Float
	1012  		r.prec = uint32(p)
	1013  		r.Set(x)
	1014  		e = r.exp - 1
	1015  
	1016  		// Rounding may have caused r to overflow to ±Inf
	1017  		// (rounding never causes underflows to 0).
	1018  		// If the exponent is too large, also overflow to ±Inf.
	1019  		if r.form == inf || e > emax {
	1020  			// overflow
	1021  			if x.neg {
	1022  				return math.Inf(-1), Below
	1023  			}
	1024  			return math.Inf(+1), Above
	1025  		}
	1026  		// e <= emax
	1027  
	1028  		// Determine sign, biased exponent, and mantissa.
	1029  		var sign, bexp, mant uint64
	1030  		if x.neg {
	1031  			sign = 1 << (fbits - 1)
	1032  		}
	1033  
	1034  		// Rounding may have caused a denormal number to
	1035  		// become normal. Check again.
	1036  		if e < emin {
	1037  			// denormal number: recompute precision
	1038  			// Since rounding may have at best increased precision
	1039  			// and we have eliminated p <= 0 early, we know p > 0.
	1040  			// bexp == 0 for denormals
	1041  			p = mbits + 1 - emin + int(e)
	1042  			mant = msb64(r.mant) >> uint(fbits-p)
	1043  		} else {
	1044  			// normal number: emin <= e <= emax
	1045  			bexp = uint64(e+bias) << mbits
	1046  			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
	1047  		}
	1048  
	1049  		return math.Float64frombits(sign | bexp | mant), r.acc
	1050  
	1051  	case zero:
	1052  		if x.neg {
	1053  			var z float64
	1054  			return -z, Exact
	1055  		}
	1056  		return 0.0, Exact
	1057  
	1058  	case inf:
	1059  		if x.neg {
	1060  			return math.Inf(-1), Exact
	1061  		}
	1062  		return math.Inf(+1), Exact
	1063  	}
	1064  
	1065  	panic("unreachable")
	1066  }
	1067  
	1068  // Int returns the result of truncating x towards zero;
	1069  // or nil if x is an infinity.
	1070  // The result is Exact if x.IsInt(); otherwise it is Below
	1071  // for x > 0, and Above for x < 0.
	1072  // If a non-nil *Int argument z is provided, Int stores
	1073  // the result in z instead of allocating a new Int.
	1074  func (x *Float) Int(z *Int) (*Int, Accuracy) {
	1075  	if debugFloat {
	1076  		x.validate()
	1077  	}
	1078  
	1079  	if z == nil && x.form <= finite {
	1080  		z = new(Int)
	1081  	}
	1082  
	1083  	switch x.form {
	1084  	case finite:
	1085  		// 0 < |x| < +Inf
	1086  		acc := makeAcc(x.neg)
	1087  		if x.exp <= 0 {
	1088  			// 0 < |x| < 1
	1089  			return z.SetInt64(0), acc
	1090  		}
	1091  		// x.exp > 0
	1092  
	1093  		// 1 <= |x| < +Inf
	1094  		// determine minimum required precision for x
	1095  		allBits := uint(len(x.mant)) * _W
	1096  		exp := uint(x.exp)
	1097  		if x.MinPrec() <= exp {
	1098  			acc = Exact
	1099  		}
	1100  		// shift mantissa as needed
	1101  		if z == nil {
	1102  			z = new(Int)
	1103  		}
	1104  		z.neg = x.neg
	1105  		switch {
	1106  		case exp > allBits:
	1107  			z.abs = z.abs.shl(x.mant, exp-allBits)
	1108  		default:
	1109  			z.abs = z.abs.set(x.mant)
	1110  		case exp < allBits:
	1111  			z.abs = z.abs.shr(x.mant, allBits-exp)
	1112  		}
	1113  		return z, acc
	1114  
	1115  	case zero:
	1116  		return z.SetInt64(0), Exact
	1117  
	1118  	case inf:
	1119  		return nil, makeAcc(x.neg)
	1120  	}
	1121  
	1122  	panic("unreachable")
	1123  }
	1124  
	1125  // Rat returns the rational number corresponding to x;
	1126  // or nil if x is an infinity.
	1127  // The result is Exact if x is not an Inf.
	1128  // If a non-nil *Rat argument z is provided, Rat stores
	1129  // the result in z instead of allocating a new Rat.
	1130  func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
	1131  	if debugFloat {
	1132  		x.validate()
	1133  	}
	1134  
	1135  	if z == nil && x.form <= finite {
	1136  		z = new(Rat)
	1137  	}
	1138  
	1139  	switch x.form {
	1140  	case finite:
	1141  		// 0 < |x| < +Inf
	1142  		allBits := int32(len(x.mant)) * _W
	1143  		// build up numerator and denominator
	1144  		z.a.neg = x.neg
	1145  		switch {
	1146  		case x.exp > allBits:
	1147  			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
	1148  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
	1149  			// z already in normal form
	1150  		default:
	1151  			z.a.abs = z.a.abs.set(x.mant)
	1152  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
	1153  			// z already in normal form
	1154  		case x.exp < allBits:
	1155  			z.a.abs = z.a.abs.set(x.mant)
	1156  			t := z.b.abs.setUint64(1)
	1157  			z.b.abs = t.shl(t, uint(allBits-x.exp))
	1158  			z.norm()
	1159  		}
	1160  		return z, Exact
	1161  
	1162  	case zero:
	1163  		return z.SetInt64(0), Exact
	1164  
	1165  	case inf:
	1166  		return nil, makeAcc(x.neg)
	1167  	}
	1168  
	1169  	panic("unreachable")
	1170  }
	1171  
	1172  // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
	1173  // and returns z.
	1174  func (z *Float) Abs(x *Float) *Float {
	1175  	z.Set(x)
	1176  	z.neg = false
	1177  	return z
	1178  }
	1179  
	1180  // Neg sets z to the (possibly rounded) value of x with its sign negated,
	1181  // and returns z.
	1182  func (z *Float) Neg(x *Float) *Float {
	1183  	z.Set(x)
	1184  	z.neg = !z.neg
	1185  	return z
	1186  }
	1187  
	1188  func validateBinaryOperands(x, y *Float) {
	1189  	if !debugFloat {
	1190  		// avoid performance bugs
	1191  		panic("validateBinaryOperands called but debugFloat is not set")
	1192  	}
	1193  	if len(x.mant) == 0 {
	1194  		panic("empty mantissa for x")
	1195  	}
	1196  	if len(y.mant) == 0 {
	1197  		panic("empty mantissa for y")
	1198  	}
	1199  }
	1200  
	1201  // z = x + y, ignoring signs of x and y for the addition
	1202  // but using the sign of z for rounding the result.
	1203  // x and y must have a non-empty mantissa and valid exponent.
	1204  func (z *Float) uadd(x, y *Float) {
	1205  	// Note: This implementation requires 2 shifts most of the
	1206  	// time. It is also inefficient if exponents or precisions
	1207  	// differ by wide margins. The following article describes
	1208  	// an efficient (but much more complicated) implementation
	1209  	// compatible with the internal representation used here:
	1210  	//
	1211  	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
	1212  	// Point Addition With Exact Rounding (as in the MPFR Library)"
	1213  	// http://www.vinc17.net/research/papers/rnc6.pdf
	1214  
	1215  	if debugFloat {
	1216  		validateBinaryOperands(x, y)
	1217  	}
	1218  
	1219  	// compute exponents ex, ey for mantissa with "binary point"
	1220  	// on the right (mantissa.0) - use int64 to avoid overflow
	1221  	ex := int64(x.exp) - int64(len(x.mant))*_W
	1222  	ey := int64(y.exp) - int64(len(y.mant))*_W
	1223  
	1224  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
	1225  
	1226  	// TODO(gri) having a combined add-and-shift primitive
	1227  	//					 could make this code significantly faster
	1228  	switch {
	1229  	case ex < ey:
	1230  		if al {
	1231  			t := nat(nil).shl(y.mant, uint(ey-ex))
	1232  			z.mant = z.mant.add(x.mant, t)
	1233  		} else {
	1234  			z.mant = z.mant.shl(y.mant, uint(ey-ex))
	1235  			z.mant = z.mant.add(x.mant, z.mant)
	1236  		}
	1237  	default:
	1238  		// ex == ey, no shift needed
	1239  		z.mant = z.mant.add(x.mant, y.mant)
	1240  	case ex > ey:
	1241  		if al {
	1242  			t := nat(nil).shl(x.mant, uint(ex-ey))
	1243  			z.mant = z.mant.add(t, y.mant)
	1244  		} else {
	1245  			z.mant = z.mant.shl(x.mant, uint(ex-ey))
	1246  			z.mant = z.mant.add(z.mant, y.mant)
	1247  		}
	1248  		ex = ey
	1249  	}
	1250  	// len(z.mant) > 0
	1251  
	1252  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
	1253  }
	1254  
	1255  // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
	1256  // but using the sign of z for rounding the result.
	1257  // x and y must have a non-empty mantissa and valid exponent.
	1258  func (z *Float) usub(x, y *Float) {
	1259  	// This code is symmetric to uadd.
	1260  	// We have not factored the common code out because
	1261  	// eventually uadd (and usub) should be optimized
	1262  	// by special-casing, and the code will diverge.
	1263  
	1264  	if debugFloat {
	1265  		validateBinaryOperands(x, y)
	1266  	}
	1267  
	1268  	ex := int64(x.exp) - int64(len(x.mant))*_W
	1269  	ey := int64(y.exp) - int64(len(y.mant))*_W
	1270  
	1271  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
	1272  
	1273  	switch {
	1274  	case ex < ey:
	1275  		if al {
	1276  			t := nat(nil).shl(y.mant, uint(ey-ex))
	1277  			z.mant = t.sub(x.mant, t)
	1278  		} else {
	1279  			z.mant = z.mant.shl(y.mant, uint(ey-ex))
	1280  			z.mant = z.mant.sub(x.mant, z.mant)
	1281  		}
	1282  	default:
	1283  		// ex == ey, no shift needed
	1284  		z.mant = z.mant.sub(x.mant, y.mant)
	1285  	case ex > ey:
	1286  		if al {
	1287  			t := nat(nil).shl(x.mant, uint(ex-ey))
	1288  			z.mant = t.sub(t, y.mant)
	1289  		} else {
	1290  			z.mant = z.mant.shl(x.mant, uint(ex-ey))
	1291  			z.mant = z.mant.sub(z.mant, y.mant)
	1292  		}
	1293  		ex = ey
	1294  	}
	1295  
	1296  	// operands may have canceled each other out
	1297  	if len(z.mant) == 0 {
	1298  		z.acc = Exact
	1299  		z.form = zero
	1300  		z.neg = false
	1301  		return
	1302  	}
	1303  	// len(z.mant) > 0
	1304  
	1305  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
	1306  }
	1307  
	1308  // z = x * y, ignoring signs of x and y for the multiplication
	1309  // but using the sign of z for rounding the result.
	1310  // x and y must have a non-empty mantissa and valid exponent.
	1311  func (z *Float) umul(x, y *Float) {
	1312  	if debugFloat {
	1313  		validateBinaryOperands(x, y)
	1314  	}
	1315  
	1316  	// Note: This is doing too much work if the precision
	1317  	// of z is less than the sum of the precisions of x
	1318  	// and y which is often the case (e.g., if all floats
	1319  	// have the same precision).
	1320  	// TODO(gri) Optimize this for the common case.
	1321  
	1322  	e := int64(x.exp) + int64(y.exp)
	1323  	if x == y {
	1324  		z.mant = z.mant.sqr(x.mant)
	1325  	} else {
	1326  		z.mant = z.mant.mul(x.mant, y.mant)
	1327  	}
	1328  	z.setExpAndRound(e-fnorm(z.mant), 0)
	1329  }
	1330  
	1331  // z = x / y, ignoring signs of x and y for the division
	1332  // but using the sign of z for rounding the result.
	1333  // x and y must have a non-empty mantissa and valid exponent.
	1334  func (z *Float) uquo(x, y *Float) {
	1335  	if debugFloat {
	1336  		validateBinaryOperands(x, y)
	1337  	}
	1338  
	1339  	// mantissa length in words for desired result precision + 1
	1340  	// (at least one extra bit so we get the rounding bit after
	1341  	// the division)
	1342  	n := int(z.prec/_W) + 1
	1343  
	1344  	// compute adjusted x.mant such that we get enough result precision
	1345  	xadj := x.mant
	1346  	if d := n - len(x.mant) + len(y.mant); d > 0 {
	1347  		// d extra words needed => add d "0 digits" to x
	1348  		xadj = make(nat, len(x.mant)+d)
	1349  		copy(xadj[d:], x.mant)
	1350  	}
	1351  	// TODO(gri): If we have too many digits (d < 0), we should be able
	1352  	// to shorten x for faster division. But we must be extra careful
	1353  	// with rounding in that case.
	1354  
	1355  	// Compute d before division since there may be aliasing of x.mant
	1356  	// (via xadj) or y.mant with z.mant.
	1357  	d := len(xadj) - len(y.mant)
	1358  
	1359  	// divide
	1360  	var r nat
	1361  	z.mant, r = z.mant.div(nil, xadj, y.mant)
	1362  	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
	1363  
	1364  	// The result is long enough to include (at least) the rounding bit.
	1365  	// If there's a non-zero remainder, the corresponding fractional part
	1366  	// (if it were computed), would have a non-zero sticky bit (if it were
	1367  	// zero, it couldn't have a non-zero remainder).
	1368  	var sbit uint
	1369  	if len(r) > 0 {
	1370  		sbit = 1
	1371  	}
	1372  
	1373  	z.setExpAndRound(e-fnorm(z.mant), sbit)
	1374  }
	1375  
	1376  // ucmp returns -1, 0, or +1, depending on whether
	1377  // |x| < |y|, |x| == |y|, or |x| > |y|.
	1378  // x and y must have a non-empty mantissa and valid exponent.
	1379  func (x *Float) ucmp(y *Float) int {
	1380  	if debugFloat {
	1381  		validateBinaryOperands(x, y)
	1382  	}
	1383  
	1384  	switch {
	1385  	case x.exp < y.exp:
	1386  		return -1
	1387  	case x.exp > y.exp:
	1388  		return +1
	1389  	}
	1390  	// x.exp == y.exp
	1391  
	1392  	// compare mantissas
	1393  	i := len(x.mant)
	1394  	j := len(y.mant)
	1395  	for i > 0 || j > 0 {
	1396  		var xm, ym Word
	1397  		if i > 0 {
	1398  			i--
	1399  			xm = x.mant[i]
	1400  		}
	1401  		if j > 0 {
	1402  			j--
	1403  			ym = y.mant[j]
	1404  		}
	1405  		switch {
	1406  		case xm < ym:
	1407  			return -1
	1408  		case xm > ym:
	1409  			return +1
	1410  		}
	1411  	}
	1412  
	1413  	return 0
	1414  }
	1415  
	1416  // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
	1417  //
	1418  // When neither the inputs nor result are NaN, the sign of a product or
	1419  // quotient is the exclusive OR of the operands’ signs; the sign of a sum,
	1420  // or of a difference x−y regarded as a sum x+(−y), differs from at most
	1421  // one of the addends’ signs; and the sign of the result of conversions,
	1422  // the quantize operation, the roundToIntegral operations, and the
	1423  // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
	1424  // These rules shall apply even when operands or results are zero or infinite.
	1425  //
	1426  // When the sum of two operands with opposite signs (or the difference of
	1427  // two operands with like signs) is exactly zero, the sign of that sum (or
	1428  // difference) shall be +0 in all rounding-direction attributes except
	1429  // roundTowardNegative; under that attribute, the sign of an exact zero
	1430  // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
	1431  // sign as x even when x is zero.
	1432  //
	1433  // See also: https://play.golang.org/p/RtH3UCt5IH
	1434  
	1435  // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
	1436  // it is changed to the larger of x's or y's precision before the operation.
	1437  // Rounding is performed according to z's precision and rounding mode; and
	1438  // z's accuracy reports the result error relative to the exact (not rounded)
	1439  // result. Add panics with ErrNaN if x and y are infinities with opposite
	1440  // signs. The value of z is undefined in that case.
	1441  func (z *Float) Add(x, y *Float) *Float {
	1442  	if debugFloat {
	1443  		x.validate()
	1444  		y.validate()
	1445  	}
	1446  
	1447  	if z.prec == 0 {
	1448  		z.prec = umax32(x.prec, y.prec)
	1449  	}
	1450  
	1451  	if x.form == finite && y.form == finite {
	1452  		// x + y (common case)
	1453  
	1454  		// Below we set z.neg = x.neg, and when z aliases y this will
	1455  		// change the y operand's sign. This is fine, because if an
	1456  		// operand aliases the receiver it'll be overwritten, but we still
	1457  		// want the original x.neg and y.neg values when we evaluate
	1458  		// x.neg != y.neg, so we need to save y.neg before setting z.neg.
	1459  		yneg := y.neg
	1460  
	1461  		z.neg = x.neg
	1462  		if x.neg == yneg {
	1463  			// x + y == x + y
	1464  			// (-x) + (-y) == -(x + y)
	1465  			z.uadd(x, y)
	1466  		} else {
	1467  			// x + (-y) == x - y == -(y - x)
	1468  			// (-x) + y == y - x == -(x - y)
	1469  			if x.ucmp(y) > 0 {
	1470  				z.usub(x, y)
	1471  			} else {
	1472  				z.neg = !z.neg
	1473  				z.usub(y, x)
	1474  			}
	1475  		}
	1476  		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
	1477  			z.neg = true
	1478  		}
	1479  		return z
	1480  	}
	1481  
	1482  	if x.form == inf && y.form == inf && x.neg != y.neg {
	1483  		// +Inf + -Inf
	1484  		// -Inf + +Inf
	1485  		// value of z is undefined but make sure it's valid
	1486  		z.acc = Exact
	1487  		z.form = zero
	1488  		z.neg = false
	1489  		panic(ErrNaN{"addition of infinities with opposite signs"})
	1490  	}
	1491  
	1492  	if x.form == zero && y.form == zero {
	1493  		// ±0 + ±0
	1494  		z.acc = Exact
	1495  		z.form = zero
	1496  		z.neg = x.neg && y.neg // -0 + -0 == -0
	1497  		return z
	1498  	}
	1499  
	1500  	if x.form == inf || y.form == zero {
	1501  		// ±Inf + y
	1502  		// x + ±0
	1503  		return z.Set(x)
	1504  	}
	1505  
	1506  	// ±0 + y
	1507  	// x + ±Inf
	1508  	return z.Set(y)
	1509  }
	1510  
	1511  // Sub sets z to the rounded difference x-y and returns z.
	1512  // Precision, rounding, and accuracy reporting are as for Add.
	1513  // Sub panics with ErrNaN if x and y are infinities with equal
	1514  // signs. The value of z is undefined in that case.
	1515  func (z *Float) Sub(x, y *Float) *Float {
	1516  	if debugFloat {
	1517  		x.validate()
	1518  		y.validate()
	1519  	}
	1520  
	1521  	if z.prec == 0 {
	1522  		z.prec = umax32(x.prec, y.prec)
	1523  	}
	1524  
	1525  	if x.form == finite && y.form == finite {
	1526  		// x - y (common case)
	1527  		yneg := y.neg
	1528  		z.neg = x.neg
	1529  		if x.neg != yneg {
	1530  			// x - (-y) == x + y
	1531  			// (-x) - y == -(x + y)
	1532  			z.uadd(x, y)
	1533  		} else {
	1534  			// x - y == x - y == -(y - x)
	1535  			// (-x) - (-y) == y - x == -(x - y)
	1536  			if x.ucmp(y) > 0 {
	1537  				z.usub(x, y)
	1538  			} else {
	1539  				z.neg = !z.neg
	1540  				z.usub(y, x)
	1541  			}
	1542  		}
	1543  		if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact {
	1544  			z.neg = true
	1545  		}
	1546  		return z
	1547  	}
	1548  
	1549  	if x.form == inf && y.form == inf && x.neg == y.neg {
	1550  		// +Inf - +Inf
	1551  		// -Inf - -Inf
	1552  		// value of z is undefined but make sure it's valid
	1553  		z.acc = Exact
	1554  		z.form = zero
	1555  		z.neg = false
	1556  		panic(ErrNaN{"subtraction of infinities with equal signs"})
	1557  	}
	1558  
	1559  	if x.form == zero && y.form == zero {
	1560  		// ±0 - ±0
	1561  		z.acc = Exact
	1562  		z.form = zero
	1563  		z.neg = x.neg && !y.neg // -0 - +0 == -0
	1564  		return z
	1565  	}
	1566  
	1567  	if x.form == inf || y.form == zero {
	1568  		// ±Inf - y
	1569  		// x - ±0
	1570  		return z.Set(x)
	1571  	}
	1572  
	1573  	// ±0 - y
	1574  	// x - ±Inf
	1575  	return z.Neg(y)
	1576  }
	1577  
	1578  // Mul sets z to the rounded product x*y and returns z.
	1579  // Precision, rounding, and accuracy reporting are as for Add.
	1580  // Mul panics with ErrNaN if one operand is zero and the other
	1581  // operand an infinity. The value of z is undefined in that case.
	1582  func (z *Float) Mul(x, y *Float) *Float {
	1583  	if debugFloat {
	1584  		x.validate()
	1585  		y.validate()
	1586  	}
	1587  
	1588  	if z.prec == 0 {
	1589  		z.prec = umax32(x.prec, y.prec)
	1590  	}
	1591  
	1592  	z.neg = x.neg != y.neg
	1593  
	1594  	if x.form == finite && y.form == finite {
	1595  		// x * y (common case)
	1596  		z.umul(x, y)
	1597  		return z
	1598  	}
	1599  
	1600  	z.acc = Exact
	1601  	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
	1602  		// ±0 * ±Inf
	1603  		// ±Inf * ±0
	1604  		// value of z is undefined but make sure it's valid
	1605  		z.form = zero
	1606  		z.neg = false
	1607  		panic(ErrNaN{"multiplication of zero with infinity"})
	1608  	}
	1609  
	1610  	if x.form == inf || y.form == inf {
	1611  		// ±Inf * y
	1612  		// x * ±Inf
	1613  		z.form = inf
	1614  		return z
	1615  	}
	1616  
	1617  	// ±0 * y
	1618  	// x * ±0
	1619  	z.form = zero
	1620  	return z
	1621  }
	1622  
	1623  // Quo sets z to the rounded quotient x/y and returns z.
	1624  // Precision, rounding, and accuracy reporting are as for Add.
	1625  // Quo panics with ErrNaN if both operands are zero or infinities.
	1626  // The value of z is undefined in that case.
	1627  func (z *Float) Quo(x, y *Float) *Float {
	1628  	if debugFloat {
	1629  		x.validate()
	1630  		y.validate()
	1631  	}
	1632  
	1633  	if z.prec == 0 {
	1634  		z.prec = umax32(x.prec, y.prec)
	1635  	}
	1636  
	1637  	z.neg = x.neg != y.neg
	1638  
	1639  	if x.form == finite && y.form == finite {
	1640  		// x / y (common case)
	1641  		z.uquo(x, y)
	1642  		return z
	1643  	}
	1644  
	1645  	z.acc = Exact
	1646  	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
	1647  		// ±0 / ±0
	1648  		// ±Inf / ±Inf
	1649  		// value of z is undefined but make sure it's valid
	1650  		z.form = zero
	1651  		z.neg = false
	1652  		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
	1653  	}
	1654  
	1655  	if x.form == zero || y.form == inf {
	1656  		// ±0 / y
	1657  		// x / ±Inf
	1658  		z.form = zero
	1659  		return z
	1660  	}
	1661  
	1662  	// x / ±0
	1663  	// ±Inf / y
	1664  	z.form = inf
	1665  	return z
	1666  }
	1667  
	1668  // Cmp compares x and y and returns:
	1669  //
	1670  //	 -1 if x <	y
	1671  //		0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
	1672  //	 +1 if x >	y
	1673  //
	1674  func (x *Float) Cmp(y *Float) int {
	1675  	if debugFloat {
	1676  		x.validate()
	1677  		y.validate()
	1678  	}
	1679  
	1680  	mx := x.ord()
	1681  	my := y.ord()
	1682  	switch {
	1683  	case mx < my:
	1684  		return -1
	1685  	case mx > my:
	1686  		return +1
	1687  	}
	1688  	// mx == my
	1689  
	1690  	// only if |mx| == 1 we have to compare the mantissae
	1691  	switch mx {
	1692  	case -1:
	1693  		return y.ucmp(x)
	1694  	case +1:
	1695  		return x.ucmp(y)
	1696  	}
	1697  
	1698  	return 0
	1699  }
	1700  
	1701  // ord classifies x and returns:
	1702  //
	1703  //	-2 if -Inf == x
	1704  //	-1 if -Inf < x < 0
	1705  //	 0 if x == 0 (signed or unsigned)
	1706  //	+1 if 0 < x < +Inf
	1707  //	+2 if x == +Inf
	1708  //
	1709  func (x *Float) ord() int {
	1710  	var m int
	1711  	switch x.form {
	1712  	case finite:
	1713  		m = 1
	1714  	case zero:
	1715  		return 0
	1716  	case inf:
	1717  		m = 2
	1718  	}
	1719  	if x.neg {
	1720  		m = -m
	1721  	}
	1722  	return m
	1723  }
	1724  
	1725  func umax32(x, y uint32) uint32 {
	1726  	if x > y {
	1727  		return x
	1728  	}
	1729  	return y
	1730  }
	1731  

View as plain text