1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements multi-precision floating-point numbers. 6 // Like in the GNU MPFR library (https://www.mpfr.org/), operands 7 // can be of mixed precision. Unlike MPFR, the rounding mode is 8 // not specified with each operation, but with each operand. The 9 // rounding mode of the result operand determines the rounding 10 // mode of an operation. This is a from-scratch implementation. 11 12 package big 13 14 import ( 15 "fmt" 16 "math" 17 "math/bits" 18 ) 19 20 const debugFloat = false // enable for debugging 21 22 // A nonzero finite Float represents a multi-precision floating point number 23 // 24 // sign × mantissa × 2**exponent 25 // 26 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 27 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 28 // All Floats are ordered, and the ordering of two Floats x and y 29 // is defined by x.Cmp(y). 30 // 31 // Each Float value also has a precision, rounding mode, and accuracy. 32 // The precision is the maximum number of mantissa bits available to 33 // represent the value. The rounding mode specifies how a result should 34 // be rounded to fit into the mantissa bits, and accuracy describes the 35 // rounding error with respect to the exact result. 36 // 37 // Unless specified otherwise, all operations (including setters) that 38 // specify a *Float variable for the result (usually via the receiver 39 // with the exception of MantExp), round the numeric result according 40 // to the precision and rounding mode of the result variable. 41 // 42 // If the provided result precision is 0 (see below), it is set to the 43 // precision of the argument with the largest precision value before any 44 // rounding takes place, and the rounding mode remains unchanged. Thus, 45 // uninitialized Floats provided as result arguments will have their 46 // precision set to a reasonable value determined by the operands, and 47 // their mode is the zero value for RoundingMode (ToNearestEven). 48 // 49 // By setting the desired precision to 24 or 53 and using matching rounding 50 // mode (typically ToNearestEven), Float operations produce the same results 51 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 52 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 53 // Exponent underflow and overflow lead to a 0 or an Infinity for different 54 // values than IEEE-754 because Float exponents have a much larger range. 55 // 56 // The zero (uninitialized) value for a Float is ready to use and represents 57 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 58 // 59 // Operations always take pointer arguments (*Float) rather 60 // than Float values, and each unique Float value requires 61 // its own unique *Float pointer. To "copy" a Float value, 62 // an existing (or newly allocated) Float must be set to 63 // a new value using the Float.Set method; shallow copies 64 // of Floats are not supported and may lead to errors. 65 type Float struct { 66 prec uint32 67 mode RoundingMode 68 acc Accuracy 69 form form 70 neg bool 71 mant nat 72 exp int32 73 } 74 75 // An ErrNaN panic is raised by a Float operation that would lead to 76 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 77 type ErrNaN struct { 78 msg string 79 } 80 81 func (err ErrNaN) Error() string { 82 return err.msg 83 } 84 85 // NewFloat allocates and returns a new Float set to x, 86 // with precision 53 and rounding mode ToNearestEven. 87 // NewFloat panics with ErrNaN if x is a NaN. 88 func NewFloat(x float64) *Float { 89 if math.IsNaN(x) { 90 panic(ErrNaN{"NewFloat(NaN)"}) 91 } 92 return new(Float).SetFloat64(x) 93 } 94 95 // Exponent and precision limits. 96 const ( 97 MaxExp = math.MaxInt32 // largest supported exponent 98 MinExp = math.MinInt32 // smallest supported exponent 99 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 100 ) 101 102 // Internal representation: The mantissa bits x.mant of a nonzero finite 103 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 104 // the slice may (but doesn't have to) be shorter if the mantissa contains 105 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 106 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 107 // trailing 0 bits or x.prec is not a multiple of the Word size _W, 108 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 109 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 110 // 111 // A zero or non-finite Float x ignores x.mant and x.exp. 112 // 113 // x form neg mant exp 114 // ---------------------------------------------------------- 115 // ±0 zero sign - - 116 // 0 < |x| < +Inf finite sign mantissa exponent 117 // ±Inf inf sign - - 118 119 // A form value describes the internal representation. 120 type form byte 121 122 // The form value order is relevant - do not change! 123 const ( 124 zero form = iota 125 finite 126 inf 127 ) 128 129 // RoundingMode determines how a Float value is rounded to the 130 // desired precision. Rounding may change the Float value; the 131 // rounding error is described by the Float's Accuracy. 132 type RoundingMode byte 133 134 // These constants define supported rounding modes. 135 const ( 136 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 137 ToNearestAway // == IEEE 754-2008 roundTiesToAway 138 ToZero // == IEEE 754-2008 roundTowardZero 139 AwayFromZero // no IEEE 754-2008 equivalent 140 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 141 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 142 ) 143 144 //go:generate stringer -type=RoundingMode 145 146 // Accuracy describes the rounding error produced by the most recent 147 // operation that generated a Float value, relative to the exact value. 148 type Accuracy int8 149 150 // Constants describing the Accuracy of a Float. 151 const ( 152 Below Accuracy = -1 153 Exact Accuracy = 0 154 Above Accuracy = +1 155 ) 156 157 //go:generate stringer -type=Accuracy 158 159 // SetPrec sets z's precision to prec and returns the (possibly) rounded 160 // value of z. Rounding occurs according to z's rounding mode if the mantissa 161 // cannot be represented in prec bits without loss of precision. 162 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 163 // If prec > MaxPrec, it is set to MaxPrec. 164 func (z *Float) SetPrec(prec uint) *Float { 165 z.acc = Exact // optimistically assume no rounding is needed 166 167 // special case 168 if prec == 0 { 169 z.prec = 0 170 if z.form == finite { 171 // truncate z to 0 172 z.acc = makeAcc(z.neg) 173 z.form = zero 174 } 175 return z 176 } 177 178 // general case 179 if prec > MaxPrec { 180 prec = MaxPrec 181 } 182 old := z.prec 183 z.prec = uint32(prec) 184 if z.prec < old { 185 z.round(0) 186 } 187 return z 188 } 189 190 func makeAcc(above bool) Accuracy { 191 if above { 192 return Above 193 } 194 return Below 195 } 196 197 // SetMode sets z's rounding mode to mode and returns an exact z. 198 // z remains unchanged otherwise. 199 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 200 func (z *Float) SetMode(mode RoundingMode) *Float { 201 z.mode = mode 202 z.acc = Exact 203 return z 204 } 205 206 // Prec returns the mantissa precision of x in bits. 207 // The result may be 0 for |x| == 0 and |x| == Inf. 208 func (x *Float) Prec() uint { 209 return uint(x.prec) 210 } 211 212 // MinPrec returns the minimum precision required to represent x exactly 213 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 214 // The result is 0 for |x| == 0 and |x| == Inf. 215 func (x *Float) MinPrec() uint { 216 if x.form != finite { 217 return 0 218 } 219 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 220 } 221 222 // Mode returns the rounding mode of x. 223 func (x *Float) Mode() RoundingMode { 224 return x.mode 225 } 226 227 // Acc returns the accuracy of x produced by the most recent 228 // operation, unless explicitly documented otherwise by that 229 // operation. 230 func (x *Float) Acc() Accuracy { 231 return x.acc 232 } 233 234 // Sign returns: 235 // 236 // -1 if x < 0 237 // 0 if x is ±0 238 // +1 if x > 0 239 // 240 func (x *Float) Sign() int { 241 if debugFloat { 242 x.validate() 243 } 244 if x.form == zero { 245 return 0 246 } 247 if x.neg { 248 return -1 249 } 250 return 1 251 } 252 253 // MantExp breaks x into its mantissa and exponent components 254 // and returns the exponent. If a non-nil mant argument is 255 // provided its value is set to the mantissa of x, with the 256 // same precision and rounding mode as x. The components 257 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 258 // Calling MantExp with a nil argument is an efficient way to 259 // get the exponent of the receiver. 260 // 261 // Special cases are: 262 // 263 // ( ±0).MantExp(mant) = 0, with mant set to ±0 264 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 265 // 266 // x and mant may be the same in which case x is set to its 267 // mantissa value. 268 func (x *Float) MantExp(mant *Float) (exp int) { 269 if debugFloat { 270 x.validate() 271 } 272 if x.form == finite { 273 exp = int(x.exp) 274 } 275 if mant != nil { 276 mant.Copy(x) 277 if mant.form == finite { 278 mant.exp = 0 279 } 280 } 281 return 282 } 283 284 func (z *Float) setExpAndRound(exp int64, sbit uint) { 285 if exp < MinExp { 286 // underflow 287 z.acc = makeAcc(z.neg) 288 z.form = zero 289 return 290 } 291 292 if exp > MaxExp { 293 // overflow 294 z.acc = makeAcc(!z.neg) 295 z.form = inf 296 return 297 } 298 299 z.form = finite 300 z.exp = int32(exp) 301 z.round(sbit) 302 } 303 304 // SetMantExp sets z to mant × 2**exp and returns z. 305 // The result z has the same precision and rounding mode 306 // as mant. SetMantExp is an inverse of MantExp but does 307 // not require 0.5 <= |mant| < 1.0. Specifically: 308 // 309 // mant := new(Float) 310 // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 311 // 312 // Special cases are: 313 // 314 // z.SetMantExp( ±0, exp) = ±0 315 // z.SetMantExp(±Inf, exp) = ±Inf 316 // 317 // z and mant may be the same in which case z's exponent 318 // is set to exp. 319 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 320 if debugFloat { 321 z.validate() 322 mant.validate() 323 } 324 z.Copy(mant) 325 326 if z.form == finite { 327 // 0 < |mant| < +Inf 328 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 329 } 330 return z 331 } 332 333 // Signbit reports whether x is negative or negative zero. 334 func (x *Float) Signbit() bool { 335 return x.neg 336 } 337 338 // IsInf reports whether x is +Inf or -Inf. 339 func (x *Float) IsInf() bool { 340 return x.form == inf 341 } 342 343 // IsInt reports whether x is an integer. 344 // ±Inf values are not integers. 345 func (x *Float) IsInt() bool { 346 if debugFloat { 347 x.validate() 348 } 349 // special cases 350 if x.form != finite { 351 return x.form == zero 352 } 353 // x.form == finite 354 if x.exp <= 0 { 355 return false 356 } 357 // x.exp > 0 358 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 359 } 360 361 // debugging support 362 func (x *Float) validate() { 363 if !debugFloat { 364 // avoid performance bugs 365 panic("validate called but debugFloat is not set") 366 } 367 if x.form != finite { 368 return 369 } 370 m := len(x.mant) 371 if m == 0 { 372 panic("nonzero finite number with empty mantissa") 373 } 374 const msb = 1 << (_W - 1) 375 if x.mant[m-1]&msb == 0 { 376 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) 377 } 378 if x.prec == 0 { 379 panic("zero precision finite number") 380 } 381 } 382 383 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 384 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 385 // have before calling round. z's mantissa must be normalized (with the msb set) 386 // or empty. 387 // 388 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 389 // sign of z. For correct rounding, the sign of z must be set correctly before 390 // calling round. 391 func (z *Float) round(sbit uint) { 392 if debugFloat { 393 z.validate() 394 } 395 396 z.acc = Exact 397 if z.form != finite { 398 // ±0 or ±Inf => nothing left to do 399 return 400 } 401 // z.form == finite && len(z.mant) > 0 402 // m > 0 implies z.prec > 0 (checked by validate) 403 404 m := uint32(len(z.mant)) // present mantissa length in words 405 bits := m * _W // present mantissa bits; bits > 0 406 if bits <= z.prec { 407 // mantissa fits => nothing to do 408 return 409 } 410 // bits > z.prec 411 412 // Rounding is based on two bits: the rounding bit (rbit) and the 413 // sticky bit (sbit). The rbit is the bit immediately before the 414 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 415 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 416 // 417 // rbit sbit => "fractional part" 418 // 419 // 0 0 == 0 420 // 0 1 > 0 , < 0.5 421 // 1 0 == 0.5 422 // 1 1 > 0.5, < 1.0 423 424 // bits > z.prec: mantissa too large => round 425 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 426 rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit 427 // The sticky bit is only needed for rounding ToNearestEven 428 // or when the rounding bit is zero. Avoid computation otherwise. 429 if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { 430 sbit = z.mant.sticky(r) 431 } 432 sbit &= 1 // be safe and ensure it's a single bit 433 434 // cut off extra words 435 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 436 if m > n { 437 copy(z.mant, z.mant[m-n:]) // move n last words to front 438 z.mant = z.mant[:n] 439 } 440 441 // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word 442 ntz := n*_W - z.prec // 0 <= ntz < _W 443 lsb := Word(1) << ntz 444 445 // round if result is inexact 446 if rbit|sbit != 0 { 447 // Make rounding decision: The result mantissa is truncated ("rounded down") 448 // by default. Decide if we need to increment, or "round up", the (unsigned) 449 // mantissa. 450 inc := false 451 switch z.mode { 452 case ToNegativeInf: 453 inc = z.neg 454 case ToZero: 455 // nothing to do 456 case ToNearestEven: 457 inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) 458 case ToNearestAway: 459 inc = rbit != 0 460 case AwayFromZero: 461 inc = true 462 case ToPositiveInf: 463 inc = !z.neg 464 default: 465 panic("unreachable") 466 } 467 468 // A positive result (!z.neg) is Above the exact result if we increment, 469 // and it's Below if we truncate (Exact results require no rounding). 470 // For a negative result (z.neg) it is exactly the opposite. 471 z.acc = makeAcc(inc != z.neg) 472 473 if inc { 474 // add 1 to mantissa 475 if addVW(z.mant, z.mant, lsb) != 0 { 476 // mantissa overflow => adjust exponent 477 if z.exp >= MaxExp { 478 // exponent overflow 479 z.form = inf 480 return 481 } 482 z.exp++ 483 // adjust mantissa: divide by 2 to compensate for exponent adjustment 484 shrVU(z.mant, z.mant, 1) 485 // set msb == carry == 1 from the mantissa overflow above 486 const msb = 1 << (_W - 1) 487 z.mant[n-1] |= msb 488 } 489 } 490 } 491 492 // zero out trailing bits in least-significant word 493 z.mant[0] &^= lsb - 1 494 495 if debugFloat { 496 z.validate() 497 } 498 } 499 500 func (z *Float) setBits64(neg bool, x uint64) *Float { 501 if z.prec == 0 { 502 z.prec = 64 503 } 504 z.acc = Exact 505 z.neg = neg 506 if x == 0 { 507 z.form = zero 508 return z 509 } 510 // x != 0 511 z.form = finite 512 s := bits.LeadingZeros64(x) 513 z.mant = z.mant.setUint64(x << uint(s)) 514 z.exp = int32(64 - s) // always fits 515 if z.prec < 64 { 516 z.round(0) 517 } 518 return z 519 } 520 521 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 522 // If z's precision is 0, it is changed to 64 (and rounding will have 523 // no effect). 524 func (z *Float) SetUint64(x uint64) *Float { 525 return z.setBits64(false, x) 526 } 527 528 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 529 // If z's precision is 0, it is changed to 64 (and rounding will have 530 // no effect). 531 func (z *Float) SetInt64(x int64) *Float { 532 u := x 533 if u < 0 { 534 u = -u 535 } 536 // We cannot simply call z.SetUint64(uint64(u)) and change 537 // the sign afterwards because the sign affects rounding. 538 return z.setBits64(x < 0, uint64(u)) 539 } 540 541 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 542 // If z's precision is 0, it is changed to 53 (and rounding will have 543 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 544 func (z *Float) SetFloat64(x float64) *Float { 545 if z.prec == 0 { 546 z.prec = 53 547 } 548 if math.IsNaN(x) { 549 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 550 } 551 z.acc = Exact 552 z.neg = math.Signbit(x) // handle -0, -Inf correctly 553 if x == 0 { 554 z.form = zero 555 return z 556 } 557 if math.IsInf(x, 0) { 558 z.form = inf 559 return z 560 } 561 // normalized x != 0 562 z.form = finite 563 fmant, exp := math.Frexp(x) // get normalized mantissa 564 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 565 z.exp = int32(exp) // always fits 566 if z.prec < 53 { 567 z.round(0) 568 } 569 return z 570 } 571 572 // fnorm normalizes mantissa m by shifting it to the left 573 // such that the msb of the most-significant word (msw) is 1. 574 // It returns the shift amount. It assumes that len(m) != 0. 575 func fnorm(m nat) int64 { 576 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 577 panic("msw of mantissa is 0") 578 } 579 s := nlz(m[len(m)-1]) 580 if s > 0 { 581 c := shlVU(m, m, s) 582 if debugFloat && c != 0 { 583 panic("nlz or shlVU incorrect") 584 } 585 } 586 return int64(s) 587 } 588 589 // SetInt sets z to the (possibly rounded) value of x and returns z. 590 // If z's precision is 0, it is changed to the larger of x.BitLen() 591 // or 64 (and rounding will have no effect). 592 func (z *Float) SetInt(x *Int) *Float { 593 // TODO(gri) can be more efficient if z.prec > 0 594 // but small compared to the size of x, or if there 595 // are many trailing 0's. 596 bits := uint32(x.BitLen()) 597 if z.prec == 0 { 598 z.prec = umax32(bits, 64) 599 } 600 z.acc = Exact 601 z.neg = x.neg 602 if len(x.abs) == 0 { 603 z.form = zero 604 return z 605 } 606 // x != 0 607 z.mant = z.mant.set(x.abs) 608 fnorm(z.mant) 609 z.setExpAndRound(int64(bits), 0) 610 return z 611 } 612 613 // SetRat sets z to the (possibly rounded) value of x and returns z. 614 // If z's precision is 0, it is changed to the largest of a.BitLen(), 615 // b.BitLen(), or 64; with x = a/b. 616 func (z *Float) SetRat(x *Rat) *Float { 617 if x.IsInt() { 618 return z.SetInt(x.Num()) 619 } 620 var a, b Float 621 a.SetInt(x.Num()) 622 b.SetInt(x.Denom()) 623 if z.prec == 0 { 624 z.prec = umax32(a.prec, b.prec) 625 } 626 return z.Quo(&a, &b) 627 } 628 629 // SetInf sets z to the infinite Float -Inf if signbit is 630 // set, or +Inf if signbit is not set, and returns z. The 631 // precision of z is unchanged and the result is always 632 // Exact. 633 func (z *Float) SetInf(signbit bool) *Float { 634 z.acc = Exact 635 z.form = inf 636 z.neg = signbit 637 return z 638 } 639 640 // Set sets z to the (possibly rounded) value of x and returns z. 641 // If z's precision is 0, it is changed to the precision of x 642 // before setting z (and rounding will have no effect). 643 // Rounding is performed according to z's precision and rounding 644 // mode; and z's accuracy reports the result error relative to the 645 // exact (not rounded) result. 646 func (z *Float) Set(x *Float) *Float { 647 if debugFloat { 648 x.validate() 649 } 650 z.acc = Exact 651 if z != x { 652 z.form = x.form 653 z.neg = x.neg 654 if x.form == finite { 655 z.exp = x.exp 656 z.mant = z.mant.set(x.mant) 657 } 658 if z.prec == 0 { 659 z.prec = x.prec 660 } else if z.prec < x.prec { 661 z.round(0) 662 } 663 } 664 return z 665 } 666 667 // Copy sets z to x, with the same precision, rounding mode, and 668 // accuracy as x, and returns z. x is not changed even if z and 669 // x are the same. 670 func (z *Float) Copy(x *Float) *Float { 671 if debugFloat { 672 x.validate() 673 } 674 if z != x { 675 z.prec = x.prec 676 z.mode = x.mode 677 z.acc = x.acc 678 z.form = x.form 679 z.neg = x.neg 680 if z.form == finite { 681 z.mant = z.mant.set(x.mant) 682 z.exp = x.exp 683 } 684 } 685 return z 686 } 687 688 // msb32 returns the 32 most significant bits of x. 689 func msb32(x nat) uint32 { 690 i := len(x) - 1 691 if i < 0 { 692 return 0 693 } 694 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 695 panic("x not normalized") 696 } 697 switch _W { 698 case 32: 699 return uint32(x[i]) 700 case 64: 701 return uint32(x[i] >> 32) 702 } 703 panic("unreachable") 704 } 705 706 // msb64 returns the 64 most significant bits of x. 707 func msb64(x nat) uint64 { 708 i := len(x) - 1 709 if i < 0 { 710 return 0 711 } 712 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 713 panic("x not normalized") 714 } 715 switch _W { 716 case 32: 717 v := uint64(x[i]) << 32 718 if i > 0 { 719 v |= uint64(x[i-1]) 720 } 721 return v 722 case 64: 723 return uint64(x[i]) 724 } 725 panic("unreachable") 726 } 727 728 // Uint64 returns the unsigned integer resulting from truncating x 729 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 730 // if x is an integer and Below otherwise. 731 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 732 // for x > math.MaxUint64. 733 func (x *Float) Uint64() (uint64, Accuracy) { 734 if debugFloat { 735 x.validate() 736 } 737 738 switch x.form { 739 case finite: 740 if x.neg { 741 return 0, Above 742 } 743 // 0 < x < +Inf 744 if x.exp <= 0 { 745 // 0 < x < 1 746 return 0, Below 747 } 748 // 1 <= x < Inf 749 if x.exp <= 64 { 750 // u = trunc(x) fits into a uint64 751 u := msb64(x.mant) >> (64 - uint32(x.exp)) 752 if x.MinPrec() <= 64 { 753 return u, Exact 754 } 755 return u, Below // x truncated 756 } 757 // x too large 758 return math.MaxUint64, Below 759 760 case zero: 761 return 0, Exact 762 763 case inf: 764 if x.neg { 765 return 0, Above 766 } 767 return math.MaxUint64, Below 768 } 769 770 panic("unreachable") 771 } 772 773 // Int64 returns the integer resulting from truncating x towards zero. 774 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 775 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 776 // The result is (math.MinInt64, Above) for x < math.MinInt64, 777 // and (math.MaxInt64, Below) for x > math.MaxInt64. 778 func (x *Float) Int64() (int64, Accuracy) { 779 if debugFloat { 780 x.validate() 781 } 782 783 switch x.form { 784 case finite: 785 // 0 < |x| < +Inf 786 acc := makeAcc(x.neg) 787 if x.exp <= 0 { 788 // 0 < |x| < 1 789 return 0, acc 790 } 791 // x.exp > 0 792 793 // 1 <= |x| < +Inf 794 if x.exp <= 63 { 795 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 796 i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) 797 if x.neg { 798 i = -i 799 } 800 if x.MinPrec() <= uint(x.exp) { 801 return i, Exact 802 } 803 return i, acc // x truncated 804 } 805 if x.neg { 806 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 807 if x.exp == 64 && x.MinPrec() == 1 { 808 acc = Exact 809 } 810 return math.MinInt64, acc 811 } 812 // x too large 813 return math.MaxInt64, Below 814 815 case zero: 816 return 0, Exact 817 818 case inf: 819 if x.neg { 820 return math.MinInt64, Above 821 } 822 return math.MaxInt64, Below 823 } 824 825 panic("unreachable") 826 } 827 828 // Float32 returns the float32 value nearest to x. If x is too small to be 829 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 830 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 831 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 832 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 833 func (x *Float) Float32() (float32, Accuracy) { 834 if debugFloat { 835 x.validate() 836 } 837 838 switch x.form { 839 case finite: 840 // 0 < |x| < +Inf 841 842 const ( 843 fbits = 32 // float size 844 mbits = 23 // mantissa size (excluding implicit msb) 845 ebits = fbits - mbits - 1 // 8 exponent size 846 bias = 1<<(ebits-1) - 1 // 127 exponent bias 847 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 848 emin = 1 - bias // -126 smallest unbiased exponent (normal) 849 emax = bias // 127 largest unbiased exponent (normal) 850 ) 851 852 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. 853 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 854 855 // Compute precision p for float32 mantissa. 856 // If the exponent is too small, we have a denormal number before 857 // rounding and fewer than p mantissa bits of precision available 858 // (the exponent remains fixed but the mantissa gets shifted right). 859 p := mbits + 1 // precision of normal float 860 if e < emin { 861 // recompute precision 862 p = mbits + 1 - emin + int(e) 863 // If p == 0, the mantissa of x is shifted so much to the right 864 // that its msb falls immediately to the right of the float32 865 // mantissa space. In other words, if the smallest denormal is 866 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 867 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 868 // If m == 0.5, it is rounded down to even, i.e., 0.0. 869 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 870 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 871 // underflow to ±0 872 if x.neg { 873 var z float32 874 return -z, Above 875 } 876 return 0.0, Below 877 } 878 // otherwise, round up 879 // We handle p == 0 explicitly because it's easy and because 880 // Float.round doesn't support rounding to 0 bits of precision. 881 if p == 0 { 882 if x.neg { 883 return -math.SmallestNonzeroFloat32, Below 884 } 885 return math.SmallestNonzeroFloat32, Above 886 } 887 } 888 // p > 0 889 890 // round 891 var r Float 892 r.prec = uint32(p) 893 r.Set(x) 894 e = r.exp - 1 895 896 // Rounding may have caused r to overflow to ±Inf 897 // (rounding never causes underflows to 0). 898 // If the exponent is too large, also overflow to ±Inf. 899 if r.form == inf || e > emax { 900 // overflow 901 if x.neg { 902 return float32(math.Inf(-1)), Below 903 } 904 return float32(math.Inf(+1)), Above 905 } 906 // e <= emax 907 908 // Determine sign, biased exponent, and mantissa. 909 var sign, bexp, mant uint32 910 if x.neg { 911 sign = 1 << (fbits - 1) 912 } 913 914 // Rounding may have caused a denormal number to 915 // become normal. Check again. 916 if e < emin { 917 // denormal number: recompute precision 918 // Since rounding may have at best increased precision 919 // and we have eliminated p <= 0 early, we know p > 0. 920 // bexp == 0 for denormals 921 p = mbits + 1 - emin + int(e) 922 mant = msb32(r.mant) >> uint(fbits-p) 923 } else { 924 // normal number: emin <= e <= emax 925 bexp = uint32(e+bias) << mbits 926 mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 927 } 928 929 return math.Float32frombits(sign | bexp | mant), r.acc 930 931 case zero: 932 if x.neg { 933 var z float32 934 return -z, Exact 935 } 936 return 0.0, Exact 937 938 case inf: 939 if x.neg { 940 return float32(math.Inf(-1)), Exact 941 } 942 return float32(math.Inf(+1)), Exact 943 } 944 945 panic("unreachable") 946 } 947 948 // Float64 returns the float64 value nearest to x. If x is too small to be 949 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 950 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 951 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 952 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 953 func (x *Float) Float64() (float64, Accuracy) { 954 if debugFloat { 955 x.validate() 956 } 957 958 switch x.form { 959 case finite: 960 // 0 < |x| < +Inf 961 962 const ( 963 fbits = 64 // float size 964 mbits = 52 // mantissa size (excluding implicit msb) 965 ebits = fbits - mbits - 1 // 11 exponent size 966 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 967 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 968 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 969 emax = bias // 1023 largest unbiased exponent (normal) 970 ) 971 972 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. 973 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 974 975 // Compute precision p for float64 mantissa. 976 // If the exponent is too small, we have a denormal number before 977 // rounding and fewer than p mantissa bits of precision available 978 // (the exponent remains fixed but the mantissa gets shifted right). 979 p := mbits + 1 // precision of normal float 980 if e < emin { 981 // recompute precision 982 p = mbits + 1 - emin + int(e) 983 // If p == 0, the mantissa of x is shifted so much to the right 984 // that its msb falls immediately to the right of the float64 985 // mantissa space. In other words, if the smallest denormal is 986 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 987 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 988 // If m == 0.5, it is rounded down to even, i.e., 0.0. 989 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 990 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 991 // underflow to ±0 992 if x.neg { 993 var z float64 994 return -z, Above 995 } 996 return 0.0, Below 997 } 998 // otherwise, round up 999 // We handle p == 0 explicitly because it's easy and because 1000 // Float.round doesn't support rounding to 0 bits of precision. 1001 if p == 0 { 1002 if x.neg { 1003 return -math.SmallestNonzeroFloat64, Below 1004 } 1005 return math.SmallestNonzeroFloat64, Above 1006 } 1007 } 1008 // p > 0 1009 1010 // round 1011 var r Float 1012 r.prec = uint32(p) 1013 r.Set(x) 1014 e = r.exp - 1 1015 1016 // Rounding may have caused r to overflow to ±Inf 1017 // (rounding never causes underflows to 0). 1018 // If the exponent is too large, also overflow to ±Inf. 1019 if r.form == inf || e > emax { 1020 // overflow 1021 if x.neg { 1022 return math.Inf(-1), Below 1023 } 1024 return math.Inf(+1), Above 1025 } 1026 // e <= emax 1027 1028 // Determine sign, biased exponent, and mantissa. 1029 var sign, bexp, mant uint64 1030 if x.neg { 1031 sign = 1 << (fbits - 1) 1032 } 1033 1034 // Rounding may have caused a denormal number to 1035 // become normal. Check again. 1036 if e < emin { 1037 // denormal number: recompute precision 1038 // Since rounding may have at best increased precision 1039 // and we have eliminated p <= 0 early, we know p > 0. 1040 // bexp == 0 for denormals 1041 p = mbits + 1 - emin + int(e) 1042 mant = msb64(r.mant) >> uint(fbits-p) 1043 } else { 1044 // normal number: emin <= e <= emax 1045 bexp = uint64(e+bias) << mbits 1046 mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1047 } 1048 1049 return math.Float64frombits(sign | bexp | mant), r.acc 1050 1051 case zero: 1052 if x.neg { 1053 var z float64 1054 return -z, Exact 1055 } 1056 return 0.0, Exact 1057 1058 case inf: 1059 if x.neg { 1060 return math.Inf(-1), Exact 1061 } 1062 return math.Inf(+1), Exact 1063 } 1064 1065 panic("unreachable") 1066 } 1067 1068 // Int returns the result of truncating x towards zero; 1069 // or nil if x is an infinity. 1070 // The result is Exact if x.IsInt(); otherwise it is Below 1071 // for x > 0, and Above for x < 0. 1072 // If a non-nil *Int argument z is provided, Int stores 1073 // the result in z instead of allocating a new Int. 1074 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1075 if debugFloat { 1076 x.validate() 1077 } 1078 1079 if z == nil && x.form <= finite { 1080 z = new(Int) 1081 } 1082 1083 switch x.form { 1084 case finite: 1085 // 0 < |x| < +Inf 1086 acc := makeAcc(x.neg) 1087 if x.exp <= 0 { 1088 // 0 < |x| < 1 1089 return z.SetInt64(0), acc 1090 } 1091 // x.exp > 0 1092 1093 // 1 <= |x| < +Inf 1094 // determine minimum required precision for x 1095 allBits := uint(len(x.mant)) * _W 1096 exp := uint(x.exp) 1097 if x.MinPrec() <= exp { 1098 acc = Exact 1099 } 1100 // shift mantissa as needed 1101 if z == nil { 1102 z = new(Int) 1103 } 1104 z.neg = x.neg 1105 switch { 1106 case exp > allBits: 1107 z.abs = z.abs.shl(x.mant, exp-allBits) 1108 default: 1109 z.abs = z.abs.set(x.mant) 1110 case exp < allBits: 1111 z.abs = z.abs.shr(x.mant, allBits-exp) 1112 } 1113 return z, acc 1114 1115 case zero: 1116 return z.SetInt64(0), Exact 1117 1118 case inf: 1119 return nil, makeAcc(x.neg) 1120 } 1121 1122 panic("unreachable") 1123 } 1124 1125 // Rat returns the rational number corresponding to x; 1126 // or nil if x is an infinity. 1127 // The result is Exact if x is not an Inf. 1128 // If a non-nil *Rat argument z is provided, Rat stores 1129 // the result in z instead of allocating a new Rat. 1130 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1131 if debugFloat { 1132 x.validate() 1133 } 1134 1135 if z == nil && x.form <= finite { 1136 z = new(Rat) 1137 } 1138 1139 switch x.form { 1140 case finite: 1141 // 0 < |x| < +Inf 1142 allBits := int32(len(x.mant)) * _W 1143 // build up numerator and denominator 1144 z.a.neg = x.neg 1145 switch { 1146 case x.exp > allBits: 1147 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1148 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1149 // z already in normal form 1150 default: 1151 z.a.abs = z.a.abs.set(x.mant) 1152 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1153 // z already in normal form 1154 case x.exp < allBits: 1155 z.a.abs = z.a.abs.set(x.mant) 1156 t := z.b.abs.setUint64(1) 1157 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1158 z.norm() 1159 } 1160 return z, Exact 1161 1162 case zero: 1163 return z.SetInt64(0), Exact 1164 1165 case inf: 1166 return nil, makeAcc(x.neg) 1167 } 1168 1169 panic("unreachable") 1170 } 1171 1172 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1173 // and returns z. 1174 func (z *Float) Abs(x *Float) *Float { 1175 z.Set(x) 1176 z.neg = false 1177 return z 1178 } 1179 1180 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1181 // and returns z. 1182 func (z *Float) Neg(x *Float) *Float { 1183 z.Set(x) 1184 z.neg = !z.neg 1185 return z 1186 } 1187 1188 func validateBinaryOperands(x, y *Float) { 1189 if !debugFloat { 1190 // avoid performance bugs 1191 panic("validateBinaryOperands called but debugFloat is not set") 1192 } 1193 if len(x.mant) == 0 { 1194 panic("empty mantissa for x") 1195 } 1196 if len(y.mant) == 0 { 1197 panic("empty mantissa for y") 1198 } 1199 } 1200 1201 // z = x + y, ignoring signs of x and y for the addition 1202 // but using the sign of z for rounding the result. 1203 // x and y must have a non-empty mantissa and valid exponent. 1204 func (z *Float) uadd(x, y *Float) { 1205 // Note: This implementation requires 2 shifts most of the 1206 // time. It is also inefficient if exponents or precisions 1207 // differ by wide margins. The following article describes 1208 // an efficient (but much more complicated) implementation 1209 // compatible with the internal representation used here: 1210 // 1211 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1212 // Point Addition With Exact Rounding (as in the MPFR Library)" 1213 // http://www.vinc17.net/research/papers/rnc6.pdf 1214 1215 if debugFloat { 1216 validateBinaryOperands(x, y) 1217 } 1218 1219 // compute exponents ex, ey for mantissa with "binary point" 1220 // on the right (mantissa.0) - use int64 to avoid overflow 1221 ex := int64(x.exp) - int64(len(x.mant))*_W 1222 ey := int64(y.exp) - int64(len(y.mant))*_W 1223 1224 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1225 1226 // TODO(gri) having a combined add-and-shift primitive 1227 // could make this code significantly faster 1228 switch { 1229 case ex < ey: 1230 if al { 1231 t := nat(nil).shl(y.mant, uint(ey-ex)) 1232 z.mant = z.mant.add(x.mant, t) 1233 } else { 1234 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1235 z.mant = z.mant.add(x.mant, z.mant) 1236 } 1237 default: 1238 // ex == ey, no shift needed 1239 z.mant = z.mant.add(x.mant, y.mant) 1240 case ex > ey: 1241 if al { 1242 t := nat(nil).shl(x.mant, uint(ex-ey)) 1243 z.mant = z.mant.add(t, y.mant) 1244 } else { 1245 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1246 z.mant = z.mant.add(z.mant, y.mant) 1247 } 1248 ex = ey 1249 } 1250 // len(z.mant) > 0 1251 1252 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1253 } 1254 1255 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1256 // but using the sign of z for rounding the result. 1257 // x and y must have a non-empty mantissa and valid exponent. 1258 func (z *Float) usub(x, y *Float) { 1259 // This code is symmetric to uadd. 1260 // We have not factored the common code out because 1261 // eventually uadd (and usub) should be optimized 1262 // by special-casing, and the code will diverge. 1263 1264 if debugFloat { 1265 validateBinaryOperands(x, y) 1266 } 1267 1268 ex := int64(x.exp) - int64(len(x.mant))*_W 1269 ey := int64(y.exp) - int64(len(y.mant))*_W 1270 1271 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1272 1273 switch { 1274 case ex < ey: 1275 if al { 1276 t := nat(nil).shl(y.mant, uint(ey-ex)) 1277 z.mant = t.sub(x.mant, t) 1278 } else { 1279 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1280 z.mant = z.mant.sub(x.mant, z.mant) 1281 } 1282 default: 1283 // ex == ey, no shift needed 1284 z.mant = z.mant.sub(x.mant, y.mant) 1285 case ex > ey: 1286 if al { 1287 t := nat(nil).shl(x.mant, uint(ex-ey)) 1288 z.mant = t.sub(t, y.mant) 1289 } else { 1290 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1291 z.mant = z.mant.sub(z.mant, y.mant) 1292 } 1293 ex = ey 1294 } 1295 1296 // operands may have canceled each other out 1297 if len(z.mant) == 0 { 1298 z.acc = Exact 1299 z.form = zero 1300 z.neg = false 1301 return 1302 } 1303 // len(z.mant) > 0 1304 1305 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1306 } 1307 1308 // z = x * y, ignoring signs of x and y for the multiplication 1309 // but using the sign of z for rounding the result. 1310 // x and y must have a non-empty mantissa and valid exponent. 1311 func (z *Float) umul(x, y *Float) { 1312 if debugFloat { 1313 validateBinaryOperands(x, y) 1314 } 1315 1316 // Note: This is doing too much work if the precision 1317 // of z is less than the sum of the precisions of x 1318 // and y which is often the case (e.g., if all floats 1319 // have the same precision). 1320 // TODO(gri) Optimize this for the common case. 1321 1322 e := int64(x.exp) + int64(y.exp) 1323 if x == y { 1324 z.mant = z.mant.sqr(x.mant) 1325 } else { 1326 z.mant = z.mant.mul(x.mant, y.mant) 1327 } 1328 z.setExpAndRound(e-fnorm(z.mant), 0) 1329 } 1330 1331 // z = x / y, ignoring signs of x and y for the division 1332 // but using the sign of z for rounding the result. 1333 // x and y must have a non-empty mantissa and valid exponent. 1334 func (z *Float) uquo(x, y *Float) { 1335 if debugFloat { 1336 validateBinaryOperands(x, y) 1337 } 1338 1339 // mantissa length in words for desired result precision + 1 1340 // (at least one extra bit so we get the rounding bit after 1341 // the division) 1342 n := int(z.prec/_W) + 1 1343 1344 // compute adjusted x.mant such that we get enough result precision 1345 xadj := x.mant 1346 if d := n - len(x.mant) + len(y.mant); d > 0 { 1347 // d extra words needed => add d "0 digits" to x 1348 xadj = make(nat, len(x.mant)+d) 1349 copy(xadj[d:], x.mant) 1350 } 1351 // TODO(gri): If we have too many digits (d < 0), we should be able 1352 // to shorten x for faster division. But we must be extra careful 1353 // with rounding in that case. 1354 1355 // Compute d before division since there may be aliasing of x.mant 1356 // (via xadj) or y.mant with z.mant. 1357 d := len(xadj) - len(y.mant) 1358 1359 // divide 1360 var r nat 1361 z.mant, r = z.mant.div(nil, xadj, y.mant) 1362 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1363 1364 // The result is long enough to include (at least) the rounding bit. 1365 // If there's a non-zero remainder, the corresponding fractional part 1366 // (if it were computed), would have a non-zero sticky bit (if it were 1367 // zero, it couldn't have a non-zero remainder). 1368 var sbit uint 1369 if len(r) > 0 { 1370 sbit = 1 1371 } 1372 1373 z.setExpAndRound(e-fnorm(z.mant), sbit) 1374 } 1375 1376 // ucmp returns -1, 0, or +1, depending on whether 1377 // |x| < |y|, |x| == |y|, or |x| > |y|. 1378 // x and y must have a non-empty mantissa and valid exponent. 1379 func (x *Float) ucmp(y *Float) int { 1380 if debugFloat { 1381 validateBinaryOperands(x, y) 1382 } 1383 1384 switch { 1385 case x.exp < y.exp: 1386 return -1 1387 case x.exp > y.exp: 1388 return +1 1389 } 1390 // x.exp == y.exp 1391 1392 // compare mantissas 1393 i := len(x.mant) 1394 j := len(y.mant) 1395 for i > 0 || j > 0 { 1396 var xm, ym Word 1397 if i > 0 { 1398 i-- 1399 xm = x.mant[i] 1400 } 1401 if j > 0 { 1402 j-- 1403 ym = y.mant[j] 1404 } 1405 switch { 1406 case xm < ym: 1407 return -1 1408 case xm > ym: 1409 return +1 1410 } 1411 } 1412 1413 return 0 1414 } 1415 1416 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1417 // 1418 // When neither the inputs nor result are NaN, the sign of a product or 1419 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1420 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1421 // one of the addends’ signs; and the sign of the result of conversions, 1422 // the quantize operation, the roundToIntegral operations, and the 1423 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1424 // These rules shall apply even when operands or results are zero or infinite. 1425 // 1426 // When the sum of two operands with opposite signs (or the difference of 1427 // two operands with like signs) is exactly zero, the sign of that sum (or 1428 // difference) shall be +0 in all rounding-direction attributes except 1429 // roundTowardNegative; under that attribute, the sign of an exact zero 1430 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1431 // sign as x even when x is zero. 1432 // 1433 // See also: https://play.golang.org/p/RtH3UCt5IH 1434 1435 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1436 // it is changed to the larger of x's or y's precision before the operation. 1437 // Rounding is performed according to z's precision and rounding mode; and 1438 // z's accuracy reports the result error relative to the exact (not rounded) 1439 // result. Add panics with ErrNaN if x and y are infinities with opposite 1440 // signs. The value of z is undefined in that case. 1441 func (z *Float) Add(x, y *Float) *Float { 1442 if debugFloat { 1443 x.validate() 1444 y.validate() 1445 } 1446 1447 if z.prec == 0 { 1448 z.prec = umax32(x.prec, y.prec) 1449 } 1450 1451 if x.form == finite && y.form == finite { 1452 // x + y (common case) 1453 1454 // Below we set z.neg = x.neg, and when z aliases y this will 1455 // change the y operand's sign. This is fine, because if an 1456 // operand aliases the receiver it'll be overwritten, but we still 1457 // want the original x.neg and y.neg values when we evaluate 1458 // x.neg != y.neg, so we need to save y.neg before setting z.neg. 1459 yneg := y.neg 1460 1461 z.neg = x.neg 1462 if x.neg == yneg { 1463 // x + y == x + y 1464 // (-x) + (-y) == -(x + y) 1465 z.uadd(x, y) 1466 } else { 1467 // x + (-y) == x - y == -(y - x) 1468 // (-x) + y == y - x == -(x - y) 1469 if x.ucmp(y) > 0 { 1470 z.usub(x, y) 1471 } else { 1472 z.neg = !z.neg 1473 z.usub(y, x) 1474 } 1475 } 1476 if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { 1477 z.neg = true 1478 } 1479 return z 1480 } 1481 1482 if x.form == inf && y.form == inf && x.neg != y.neg { 1483 // +Inf + -Inf 1484 // -Inf + +Inf 1485 // value of z is undefined but make sure it's valid 1486 z.acc = Exact 1487 z.form = zero 1488 z.neg = false 1489 panic(ErrNaN{"addition of infinities with opposite signs"}) 1490 } 1491 1492 if x.form == zero && y.form == zero { 1493 // ±0 + ±0 1494 z.acc = Exact 1495 z.form = zero 1496 z.neg = x.neg && y.neg // -0 + -0 == -0 1497 return z 1498 } 1499 1500 if x.form == inf || y.form == zero { 1501 // ±Inf + y 1502 // x + ±0 1503 return z.Set(x) 1504 } 1505 1506 // ±0 + y 1507 // x + ±Inf 1508 return z.Set(y) 1509 } 1510 1511 // Sub sets z to the rounded difference x-y and returns z. 1512 // Precision, rounding, and accuracy reporting are as for Add. 1513 // Sub panics with ErrNaN if x and y are infinities with equal 1514 // signs. The value of z is undefined in that case. 1515 func (z *Float) Sub(x, y *Float) *Float { 1516 if debugFloat { 1517 x.validate() 1518 y.validate() 1519 } 1520 1521 if z.prec == 0 { 1522 z.prec = umax32(x.prec, y.prec) 1523 } 1524 1525 if x.form == finite && y.form == finite { 1526 // x - y (common case) 1527 yneg := y.neg 1528 z.neg = x.neg 1529 if x.neg != yneg { 1530 // x - (-y) == x + y 1531 // (-x) - y == -(x + y) 1532 z.uadd(x, y) 1533 } else { 1534 // x - y == x - y == -(y - x) 1535 // (-x) - (-y) == y - x == -(x - y) 1536 if x.ucmp(y) > 0 { 1537 z.usub(x, y) 1538 } else { 1539 z.neg = !z.neg 1540 z.usub(y, x) 1541 } 1542 } 1543 if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { 1544 z.neg = true 1545 } 1546 return z 1547 } 1548 1549 if x.form == inf && y.form == inf && x.neg == y.neg { 1550 // +Inf - +Inf 1551 // -Inf - -Inf 1552 // value of z is undefined but make sure it's valid 1553 z.acc = Exact 1554 z.form = zero 1555 z.neg = false 1556 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1557 } 1558 1559 if x.form == zero && y.form == zero { 1560 // ±0 - ±0 1561 z.acc = Exact 1562 z.form = zero 1563 z.neg = x.neg && !y.neg // -0 - +0 == -0 1564 return z 1565 } 1566 1567 if x.form == inf || y.form == zero { 1568 // ±Inf - y 1569 // x - ±0 1570 return z.Set(x) 1571 } 1572 1573 // ±0 - y 1574 // x - ±Inf 1575 return z.Neg(y) 1576 } 1577 1578 // Mul sets z to the rounded product x*y and returns z. 1579 // Precision, rounding, and accuracy reporting are as for Add. 1580 // Mul panics with ErrNaN if one operand is zero and the other 1581 // operand an infinity. The value of z is undefined in that case. 1582 func (z *Float) Mul(x, y *Float) *Float { 1583 if debugFloat { 1584 x.validate() 1585 y.validate() 1586 } 1587 1588 if z.prec == 0 { 1589 z.prec = umax32(x.prec, y.prec) 1590 } 1591 1592 z.neg = x.neg != y.neg 1593 1594 if x.form == finite && y.form == finite { 1595 // x * y (common case) 1596 z.umul(x, y) 1597 return z 1598 } 1599 1600 z.acc = Exact 1601 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1602 // ±0 * ±Inf 1603 // ±Inf * ±0 1604 // value of z is undefined but make sure it's valid 1605 z.form = zero 1606 z.neg = false 1607 panic(ErrNaN{"multiplication of zero with infinity"}) 1608 } 1609 1610 if x.form == inf || y.form == inf { 1611 // ±Inf * y 1612 // x * ±Inf 1613 z.form = inf 1614 return z 1615 } 1616 1617 // ±0 * y 1618 // x * ±0 1619 z.form = zero 1620 return z 1621 } 1622 1623 // Quo sets z to the rounded quotient x/y and returns z. 1624 // Precision, rounding, and accuracy reporting are as for Add. 1625 // Quo panics with ErrNaN if both operands are zero or infinities. 1626 // The value of z is undefined in that case. 1627 func (z *Float) Quo(x, y *Float) *Float { 1628 if debugFloat { 1629 x.validate() 1630 y.validate() 1631 } 1632 1633 if z.prec == 0 { 1634 z.prec = umax32(x.prec, y.prec) 1635 } 1636 1637 z.neg = x.neg != y.neg 1638 1639 if x.form == finite && y.form == finite { 1640 // x / y (common case) 1641 z.uquo(x, y) 1642 return z 1643 } 1644 1645 z.acc = Exact 1646 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1647 // ±0 / ±0 1648 // ±Inf / ±Inf 1649 // value of z is undefined but make sure it's valid 1650 z.form = zero 1651 z.neg = false 1652 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1653 } 1654 1655 if x.form == zero || y.form == inf { 1656 // ±0 / y 1657 // x / ±Inf 1658 z.form = zero 1659 return z 1660 } 1661 1662 // x / ±0 1663 // ±Inf / y 1664 z.form = inf 1665 return z 1666 } 1667 1668 // Cmp compares x and y and returns: 1669 // 1670 // -1 if x < y 1671 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1672 // +1 if x > y 1673 // 1674 func (x *Float) Cmp(y *Float) int { 1675 if debugFloat { 1676 x.validate() 1677 y.validate() 1678 } 1679 1680 mx := x.ord() 1681 my := y.ord() 1682 switch { 1683 case mx < my: 1684 return -1 1685 case mx > my: 1686 return +1 1687 } 1688 // mx == my 1689 1690 // only if |mx| == 1 we have to compare the mantissae 1691 switch mx { 1692 case -1: 1693 return y.ucmp(x) 1694 case +1: 1695 return x.ucmp(y) 1696 } 1697 1698 return 0 1699 } 1700 1701 // ord classifies x and returns: 1702 // 1703 // -2 if -Inf == x 1704 // -1 if -Inf < x < 0 1705 // 0 if x == 0 (signed or unsigned) 1706 // +1 if 0 < x < +Inf 1707 // +2 if x == +Inf 1708 // 1709 func (x *Float) ord() int { 1710 var m int 1711 switch x.form { 1712 case finite: 1713 m = 1 1714 case zero: 1715 return 0 1716 case inf: 1717 m = 2 1718 } 1719 if x.neg { 1720 m = -m 1721 } 1722 return m 1723 } 1724 1725 func umax32(x, y uint32) uint32 { 1726 if x > y { 1727 return x 1728 } 1729 return y 1730 } 1731