natconv.go

Documentation: math/big

		 1  // Copyright 2015 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // This file implements nat-to-string conversion functions.
		 6  
		 7  package big
		 8  
		 9  import (
		10  	"errors"
		11  	"fmt"
		12  	"io"
		13  	"math"
		14  	"math/bits"
		15  	"sync"
		16  )
		17  
		18  const digits = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
		19  
		20  // Note: MaxBase = len(digits), but it must remain an untyped rune constant
		21  //			 for API compatibility.
		22  
		23  // MaxBase is the largest number base accepted for string conversions.
		24  const MaxBase = 10 + ('z' - 'a' + 1) + ('Z' - 'A' + 1)
		25  const maxBaseSmall = 10 + ('z' - 'a' + 1)
		26  
		27  // maxPow returns (b**n, n) such that b**n is the largest power b**n <= _M.
		28  // For instance maxPow(10) == (1e19, 19) for 19 decimal digits in a 64bit Word.
		29  // In other words, at most n digits in base b fit into a Word.
		30  // TODO(gri) replace this with a table, generated at build time.
		31  func maxPow(b Word) (p Word, n int) {
		32  	p, n = b, 1 // assuming b <= _M
		33  	for max := _M / b; p <= max; {
		34  		// p == b**n && p <= max
		35  		p *= b
		36  		n++
		37  	}
		38  	// p == b**n && p <= _M
		39  	return
		40  }
		41  
		42  // pow returns x**n for n > 0, and 1 otherwise.
		43  func pow(x Word, n int) (p Word) {
		44  	// n == sum of bi * 2**i, for 0 <= i < imax, and bi is 0 or 1
		45  	// thus x**n == product of x**(2**i) for all i where bi == 1
		46  	// (Russian Peasant Method for exponentiation)
		47  	p = 1
		48  	for n > 0 {
		49  		if n&1 != 0 {
		50  			p *= x
		51  		}
		52  		x *= x
		53  		n >>= 1
		54  	}
		55  	return
		56  }
		57  
		58  // scan errors
		59  var (
		60  	errNoDigits = errors.New("number has no digits")
		61  	errInvalSep = errors.New("'_' must separate successive digits")
		62  )
		63  
		64  // scan scans the number corresponding to the longest possible prefix
		65  // from r representing an unsigned number in a given conversion base.
		66  // scan returns the corresponding natural number res, the actual base b,
		67  // a digit count, and a read or syntax error err, if any.
		68  //
		69  // For base 0, an underscore character ``_'' may appear between a base
		70  // prefix and an adjacent digit, and between successive digits; such
		71  // underscores do not change the value of the number, or the returned
		72  // digit count. Incorrect placement of underscores is reported as an
		73  // error if there are no other errors. If base != 0, underscores are
		74  // not recognized and thus terminate scanning like any other character
		75  // that is not a valid radix point or digit.
		76  //
		77  //		 number		= mantissa | prefix pmantissa .
		78  //		 prefix		= "0" [ "b" | "B" | "o" | "O" | "x" | "X" ] .
		79  //		 mantissa	= digits "." [ digits ] | digits | "." digits .
		80  //		 pmantissa = [ "_" ] digits "." [ digits ] | [ "_" ] digits | "." digits .
		81  //		 digits		= digit { [ "_" ] digit } .
		82  //		 digit		 = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
		83  //
		84  // Unless fracOk is set, the base argument must be 0 or a value between
		85  // 2 and MaxBase. If fracOk is set, the base argument must be one of
		86  // 0, 2, 8, 10, or 16. Providing an invalid base argument leads to a run-
		87  // time panic.
		88  //
		89  // For base 0, the number prefix determines the actual base: A prefix of
		90  // ``0b'' or ``0B'' selects base 2, ``0o'' or ``0O'' selects base 8, and
		91  // ``0x'' or ``0X'' selects base 16. If fracOk is false, a ``0'' prefix
		92  // (immediately followed by digits) selects base 8 as well. Otherwise,
		93  // the selected base is 10 and no prefix is accepted.
		94  //
		95  // If fracOk is set, a period followed by a fractional part is permitted.
		96  // The result value is computed as if there were no period present; and
		97  // the count value is used to determine the fractional part.
		98  //
		99  // For bases <= 36, lower and upper case letters are considered the same:
	 100  // The letters 'a' to 'z' and 'A' to 'Z' represent digit values 10 to 35.
	 101  // For bases > 36, the upper case letters 'A' to 'Z' represent the digit
	 102  // values 36 to 61.
	 103  //
	 104  // A result digit count > 0 corresponds to the number of (non-prefix) digits
	 105  // parsed. A digit count <= 0 indicates the presence of a period (if fracOk
	 106  // is set, only), and -count is the number of fractional digits found.
	 107  // In this case, the actual value of the scanned number is res * b**count.
	 108  //
	 109  func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) {
	 110  	// reject invalid bases
	 111  	baseOk := base == 0 ||
	 112  		!fracOk && 2 <= base && base <= MaxBase ||
	 113  		fracOk && (base == 2 || base == 8 || base == 10 || base == 16)
	 114  	if !baseOk {
	 115  		panic(fmt.Sprintf("invalid number base %d", base))
	 116  	}
	 117  
	 118  	// prev encodes the previously seen char: it is one
	 119  	// of '_', '0' (a digit), or '.' (anything else). A
	 120  	// valid separator '_' may only occur after a digit
	 121  	// and if base == 0.
	 122  	prev := '.'
	 123  	invalSep := false
	 124  
	 125  	// one char look-ahead
	 126  	ch, err := r.ReadByte()
	 127  
	 128  	// determine actual base
	 129  	b, prefix := base, 0
	 130  	if base == 0 {
	 131  		// actual base is 10 unless there's a base prefix
	 132  		b = 10
	 133  		if err == nil && ch == '0' {
	 134  			prev = '0'
	 135  			count = 1
	 136  			ch, err = r.ReadByte()
	 137  			if err == nil {
	 138  				// possibly one of 0b, 0B, 0o, 0O, 0x, 0X
	 139  				switch ch {
	 140  				case 'b', 'B':
	 141  					b, prefix = 2, 'b'
	 142  				case 'o', 'O':
	 143  					b, prefix = 8, 'o'
	 144  				case 'x', 'X':
	 145  					b, prefix = 16, 'x'
	 146  				default:
	 147  					if !fracOk {
	 148  						b, prefix = 8, '0'
	 149  					}
	 150  				}
	 151  				if prefix != 0 {
	 152  					count = 0 // prefix is not counted
	 153  					if prefix != '0' {
	 154  						ch, err = r.ReadByte()
	 155  					}
	 156  				}
	 157  			}
	 158  		}
	 159  	}
	 160  
	 161  	// convert string
	 162  	// Algorithm: Collect digits in groups of at most n digits in di
	 163  	// and then use mulAddWW for every such group to add them to the
	 164  	// result.
	 165  	z = z[:0]
	 166  	b1 := Word(b)
	 167  	bn, n := maxPow(b1) // at most n digits in base b1 fit into Word
	 168  	di := Word(0)			 // 0 <= di < b1**i < bn
	 169  	i := 0							// 0 <= i < n
	 170  	dp := -1						// position of decimal point
	 171  	for err == nil {
	 172  		if ch == '.' && fracOk {
	 173  			fracOk = false
	 174  			if prev == '_' {
	 175  				invalSep = true
	 176  			}
	 177  			prev = '.'
	 178  			dp = count
	 179  		} else if ch == '_' && base == 0 {
	 180  			if prev != '0' {
	 181  				invalSep = true
	 182  			}
	 183  			prev = '_'
	 184  		} else {
	 185  			// convert rune into digit value d1
	 186  			var d1 Word
	 187  			switch {
	 188  			case '0' <= ch && ch <= '9':
	 189  				d1 = Word(ch - '0')
	 190  			case 'a' <= ch && ch <= 'z':
	 191  				d1 = Word(ch - 'a' + 10)
	 192  			case 'A' <= ch && ch <= 'Z':
	 193  				if b <= maxBaseSmall {
	 194  					d1 = Word(ch - 'A' + 10)
	 195  				} else {
	 196  					d1 = Word(ch - 'A' + maxBaseSmall)
	 197  				}
	 198  			default:
	 199  				d1 = MaxBase + 1
	 200  			}
	 201  			if d1 >= b1 {
	 202  				r.UnreadByte() // ch does not belong to number anymore
	 203  				break
	 204  			}
	 205  			prev = '0'
	 206  			count++
	 207  
	 208  			// collect d1 in di
	 209  			di = di*b1 + d1
	 210  			i++
	 211  
	 212  			// if di is "full", add it to the result
	 213  			if i == n {
	 214  				z = z.mulAddWW(z, bn, di)
	 215  				di = 0
	 216  				i = 0
	 217  			}
	 218  		}
	 219  
	 220  		ch, err = r.ReadByte()
	 221  	}
	 222  
	 223  	if err == io.EOF {
	 224  		err = nil
	 225  	}
	 226  
	 227  	// other errors take precedence over invalid separators
	 228  	if err == nil && (invalSep || prev == '_') {
	 229  		err = errInvalSep
	 230  	}
	 231  
	 232  	if count == 0 {
	 233  		// no digits found
	 234  		if prefix == '0' {
	 235  			// there was only the octal prefix 0 (possibly followed by separators and digits > 7);
	 236  			// interpret as decimal 0
	 237  			return z[:0], 10, 1, err
	 238  		}
	 239  		err = errNoDigits // fall through; result will be 0
	 240  	}
	 241  
	 242  	// add remaining digits to result
	 243  	if i > 0 {
	 244  		z = z.mulAddWW(z, pow(b1, i), di)
	 245  	}
	 246  	res = z.norm()
	 247  
	 248  	// adjust count for fraction, if any
	 249  	if dp >= 0 {
	 250  		// 0 <= dp <= count
	 251  		count = dp - count
	 252  	}
	 253  
	 254  	return
	 255  }
	 256  
	 257  // utoa converts x to an ASCII representation in the given base;
	 258  // base must be between 2 and MaxBase, inclusive.
	 259  func (x nat) utoa(base int) []byte {
	 260  	return x.itoa(false, base)
	 261  }
	 262  
	 263  // itoa is like utoa but it prepends a '-' if neg && x != 0.
	 264  func (x nat) itoa(neg bool, base int) []byte {
	 265  	if base < 2 || base > MaxBase {
	 266  		panic("invalid base")
	 267  	}
	 268  
	 269  	// x == 0
	 270  	if len(x) == 0 {
	 271  		return []byte("0")
	 272  	}
	 273  	// len(x) > 0
	 274  
	 275  	// allocate buffer for conversion
	 276  	i := int(float64(x.bitLen())/math.Log2(float64(base))) + 1 // off by 1 at most
	 277  	if neg {
	 278  		i++
	 279  	}
	 280  	s := make([]byte, i)
	 281  
	 282  	// convert power of two and non power of two bases separately
	 283  	if b := Word(base); b == b&-b {
	 284  		// shift is base b digit size in bits
	 285  		shift := uint(bits.TrailingZeros(uint(b))) // shift > 0 because b >= 2
	 286  		mask := Word(1<<shift - 1)
	 287  		w := x[0]				 // current word
	 288  		nbits := uint(_W) // number of unprocessed bits in w
	 289  
	 290  		// convert less-significant words (include leading zeros)
	 291  		for k := 1; k < len(x); k++ {
	 292  			// convert full digits
	 293  			for nbits >= shift {
	 294  				i--
	 295  				s[i] = digits[w&mask]
	 296  				w >>= shift
	 297  				nbits -= shift
	 298  			}
	 299  
	 300  			// convert any partial leading digit and advance to next word
	 301  			if nbits == 0 {
	 302  				// no partial digit remaining, just advance
	 303  				w = x[k]
	 304  				nbits = _W
	 305  			} else {
	 306  				// partial digit in current word w (== x[k-1]) and next word x[k]
	 307  				w |= x[k] << nbits
	 308  				i--
	 309  				s[i] = digits[w&mask]
	 310  
	 311  				// advance
	 312  				w = x[k] >> (shift - nbits)
	 313  				nbits = _W - (shift - nbits)
	 314  			}
	 315  		}
	 316  
	 317  		// convert digits of most-significant word w (omit leading zeros)
	 318  		for w != 0 {
	 319  			i--
	 320  			s[i] = digits[w&mask]
	 321  			w >>= shift
	 322  		}
	 323  
	 324  	} else {
	 325  		bb, ndigits := maxPow(b)
	 326  
	 327  		// construct table of successive squares of bb*leafSize to use in subdivisions
	 328  		// result (table != nil) <=> (len(x) > leafSize > 0)
	 329  		table := divisors(len(x), b, ndigits, bb)
	 330  
	 331  		// preserve x, create local copy for use by convertWords
	 332  		q := nat(nil).set(x)
	 333  
	 334  		// convert q to string s in base b
	 335  		q.convertWords(s, b, ndigits, bb, table)
	 336  
	 337  		// strip leading zeros
	 338  		// (x != 0; thus s must contain at least one non-zero digit
	 339  		// and the loop will terminate)
	 340  		i = 0
	 341  		for s[i] == '0' {
	 342  			i++
	 343  		}
	 344  	}
	 345  
	 346  	if neg {
	 347  		i--
	 348  		s[i] = '-'
	 349  	}
	 350  
	 351  	return s[i:]
	 352  }
	 353  
	 354  // Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
	 355  // by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
	 356  // repeated nat/Word division.
	 357  //
	 358  // The iterative method processes n Words by n divW() calls, each of which visits every Word in the
	 359  // incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
	 360  // Recursive conversion divides q by its approximate square root, yielding two parts, each half
	 361  // the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
	 362  // plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
	 363  // is made better by splitting the subblocks recursively. Best is to split blocks until one more
	 364  // split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
	 365  // iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
	 366  // range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
	 367  // ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
	 368  // specific hardware.
	 369  //
	 370  func (q nat) convertWords(s []byte, b Word, ndigits int, bb Word, table []divisor) {
	 371  	// split larger blocks recursively
	 372  	if table != nil {
	 373  		// len(q) > leafSize > 0
	 374  		var r nat
	 375  		index := len(table) - 1
	 376  		for len(q) > leafSize {
	 377  			// find divisor close to sqrt(q) if possible, but in any case < q
	 378  			maxLength := q.bitLen()		 // ~= log2 q, or at of least largest possible q of this bit length
	 379  			minLength := maxLength >> 1 // ~= log2 sqrt(q)
	 380  			for index > 0 && table[index-1].nbits > minLength {
	 381  				index-- // desired
	 382  			}
	 383  			if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 {
	 384  				index--
	 385  				if index < 0 {
	 386  					panic("internal inconsistency")
	 387  				}
	 388  			}
	 389  
	 390  			// split q into the two digit number (q'*bbb + r) to form independent subblocks
	 391  			q, r = q.div(r, q, table[index].bbb)
	 392  
	 393  			// convert subblocks and collect results in s[:h] and s[h:]
	 394  			h := len(s) - table[index].ndigits
	 395  			r.convertWords(s[h:], b, ndigits, bb, table[0:index])
	 396  			s = s[:h] // == q.convertWords(s, b, ndigits, bb, table[0:index+1])
	 397  		}
	 398  	}
	 399  
	 400  	// having split any large blocks now process the remaining (small) block iteratively
	 401  	i := len(s)
	 402  	var r Word
	 403  	if b == 10 {
	 404  		// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
	 405  		for len(q) > 0 {
	 406  			// extract least significant, base bb "digit"
	 407  			q, r = q.divW(q, bb)
	 408  			for j := 0; j < ndigits && i > 0; j++ {
	 409  				i--
	 410  				// avoid % computation since r%10 == r - int(r/10)*10;
	 411  				// this appears to be faster for BenchmarkString10000Base10
	 412  				// and smaller strings (but a bit slower for larger ones)
	 413  				t := r / 10
	 414  				s[i] = '0' + byte(r-t*10)
	 415  				r = t
	 416  			}
	 417  		}
	 418  	} else {
	 419  		for len(q) > 0 {
	 420  			// extract least significant, base bb "digit"
	 421  			q, r = q.divW(q, bb)
	 422  			for j := 0; j < ndigits && i > 0; j++ {
	 423  				i--
	 424  				s[i] = digits[r%b]
	 425  				r /= b
	 426  			}
	 427  		}
	 428  	}
	 429  
	 430  	// prepend high-order zeros
	 431  	for i > 0 { // while need more leading zeros
	 432  		i--
	 433  		s[i] = '0'
	 434  	}
	 435  }
	 436  
	 437  // Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
	 438  // Benchmark and configure leafSize using: go test -bench="Leaf"
	 439  //	 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
	 440  //	 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
	 441  var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
	 442  
	 443  type divisor struct {
	 444  	bbb		 nat // divisor
	 445  	nbits	 int // bit length of divisor (discounting leading zeros) ~= log2(bbb)
	 446  	ndigits int // digit length of divisor in terms of output base digits
	 447  }
	 448  
	 449  var cacheBase10 struct {
	 450  	sync.Mutex
	 451  	table [64]divisor // cached divisors for base 10
	 452  }
	 453  
	 454  // expWW computes x**y
	 455  func (z nat) expWW(x, y Word) nat {
	 456  	return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
	 457  }
	 458  
	 459  // construct table of powers of bb*leafSize to use in subdivisions
	 460  func divisors(m int, b Word, ndigits int, bb Word) []divisor {
	 461  	// only compute table when recursive conversion is enabled and x is large
	 462  	if leafSize == 0 || m <= leafSize {
	 463  		return nil
	 464  	}
	 465  
	 466  	// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
	 467  	k := 1
	 468  	for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
	 469  		k++
	 470  	}
	 471  
	 472  	// reuse and extend existing table of divisors or create new table as appropriate
	 473  	var table []divisor // for b == 10, table overlaps with cacheBase10.table
	 474  	if b == 10 {
	 475  		cacheBase10.Lock()
	 476  		table = cacheBase10.table[0:k] // reuse old table for this conversion
	 477  	} else {
	 478  		table = make([]divisor, k) // create new table for this conversion
	 479  	}
	 480  
	 481  	// extend table
	 482  	if table[k-1].ndigits == 0 {
	 483  		// add new entries as needed
	 484  		var larger nat
	 485  		for i := 0; i < k; i++ {
	 486  			if table[i].ndigits == 0 {
	 487  				if i == 0 {
	 488  					table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
	 489  					table[0].ndigits = ndigits * leafSize
	 490  				} else {
	 491  					table[i].bbb = nat(nil).sqr(table[i-1].bbb)
	 492  					table[i].ndigits = 2 * table[i-1].ndigits
	 493  				}
	 494  
	 495  				// optimization: exploit aggregated extra bits in macro blocks
	 496  				larger = nat(nil).set(table[i].bbb)
	 497  				for mulAddVWW(larger, larger, b, 0) == 0 {
	 498  					table[i].bbb = table[i].bbb.set(larger)
	 499  					table[i].ndigits++
	 500  				}
	 501  
	 502  				table[i].nbits = table[i].bbb.bitLen()
	 503  			}
	 504  		}
	 505  	}
	 506  
	 507  	if b == 10 {
	 508  		cacheBase10.Unlock()
	 509  	}
	 510  
	 511  	return table
	 512  }
	 513
View as plain text