atoi.go

Documentation: strconv

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package strconv
		 6  
		 7  import "errors"
		 8  
		 9  // lower(c) is a lower-case letter if and only if
		10  // c is either that lower-case letter or the equivalent upper-case letter.
		11  // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
		12  // Note that lower of non-letters can produce other non-letters.
		13  func lower(c byte) byte {
		14  	return c | ('x' - 'X')
		15  }
		16  
		17  // ErrRange indicates that a value is out of range for the target type.
		18  var ErrRange = errors.New("value out of range")
		19  
		20  // ErrSyntax indicates that a value does not have the right syntax for the target type.
		21  var ErrSyntax = errors.New("invalid syntax")
		22  
		23  // A NumError records a failed conversion.
		24  type NumError struct {
		25  	Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
		26  	Num	string // the input
		27  	Err	error	// the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
		28  }
		29  
		30  func (e *NumError) Error() string {
		31  	return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
		32  }
		33  
		34  func (e *NumError) Unwrap() error { return e.Err }
		35  
		36  func syntaxError(fn, str string) *NumError {
		37  	return &NumError{fn, str, ErrSyntax}
		38  }
		39  
		40  func rangeError(fn, str string) *NumError {
		41  	return &NumError{fn, str, ErrRange}
		42  }
		43  
		44  func baseError(fn, str string, base int) *NumError {
		45  	return &NumError{fn, str, errors.New("invalid base " + Itoa(base))}
		46  }
		47  
		48  func bitSizeError(fn, str string, bitSize int) *NumError {
		49  	return &NumError{fn, str, errors.New("invalid bit size " + Itoa(bitSize))}
		50  }
		51  
		52  const intSize = 32 << (^uint(0) >> 63)
		53  
		54  // IntSize is the size in bits of an int or uint value.
		55  const IntSize = intSize
		56  
		57  const maxUint64 = 1<<64 - 1
		58  
		59  // ParseUint is like ParseInt but for unsigned numbers.
		60  //
		61  // A sign prefix is not permitted.
		62  func ParseUint(s string, base int, bitSize int) (uint64, error) {
		63  	const fnParseUint = "ParseUint"
		64  
		65  	if s == "" {
		66  		return 0, syntaxError(fnParseUint, s)
		67  	}
		68  
		69  	base0 := base == 0
		70  
		71  	s0 := s
		72  	switch {
		73  	case 2 <= base && base <= 36:
		74  		// valid base; nothing to do
		75  
		76  	case base == 0:
		77  		// Look for octal, hex prefix.
		78  		base = 10
		79  		if s[0] == '0' {
		80  			switch {
		81  			case len(s) >= 3 && lower(s[1]) == 'b':
		82  				base = 2
		83  				s = s[2:]
		84  			case len(s) >= 3 && lower(s[1]) == 'o':
		85  				base = 8
		86  				s = s[2:]
		87  			case len(s) >= 3 && lower(s[1]) == 'x':
		88  				base = 16
		89  				s = s[2:]
		90  			default:
		91  				base = 8
		92  				s = s[1:]
		93  			}
		94  		}
		95  
		96  	default:
		97  		return 0, baseError(fnParseUint, s0, base)
		98  	}
		99  
	 100  	if bitSize == 0 {
	 101  		bitSize = IntSize
	 102  	} else if bitSize < 0 || bitSize > 64 {
	 103  		return 0, bitSizeError(fnParseUint, s0, bitSize)
	 104  	}
	 105  
	 106  	// Cutoff is the smallest number such that cutoff*base > maxUint64.
	 107  	// Use compile-time constants for common cases.
	 108  	var cutoff uint64
	 109  	switch base {
	 110  	case 10:
	 111  		cutoff = maxUint64/10 + 1
	 112  	case 16:
	 113  		cutoff = maxUint64/16 + 1
	 114  	default:
	 115  		cutoff = maxUint64/uint64(base) + 1
	 116  	}
	 117  
	 118  	maxVal := uint64(1)<<uint(bitSize) - 1
	 119  
	 120  	underscores := false
	 121  	var n uint64
	 122  	for _, c := range []byte(s) {
	 123  		var d byte
	 124  		switch {
	 125  		case c == '_' && base0:
	 126  			underscores = true
	 127  			continue
	 128  		case '0' <= c && c <= '9':
	 129  			d = c - '0'
	 130  		case 'a' <= lower(c) && lower(c) <= 'z':
	 131  			d = lower(c) - 'a' + 10
	 132  		default:
	 133  			return 0, syntaxError(fnParseUint, s0)
	 134  		}
	 135  
	 136  		if d >= byte(base) {
	 137  			return 0, syntaxError(fnParseUint, s0)
	 138  		}
	 139  
	 140  		if n >= cutoff {
	 141  			// n*base overflows
	 142  			return maxVal, rangeError(fnParseUint, s0)
	 143  		}
	 144  		n *= uint64(base)
	 145  
	 146  		n1 := n + uint64(d)
	 147  		if n1 < n || n1 > maxVal {
	 148  			// n+d overflows
	 149  			return maxVal, rangeError(fnParseUint, s0)
	 150  		}
	 151  		n = n1
	 152  	}
	 153  
	 154  	if underscores && !underscoreOK(s0) {
	 155  		return 0, syntaxError(fnParseUint, s0)
	 156  	}
	 157  
	 158  	return n, nil
	 159  }
	 160  
	 161  // ParseInt interprets a string s in the given base (0, 2 to 36) and
	 162  // bit size (0 to 64) and returns the corresponding value i.
	 163  //
	 164  // The string may begin with a leading sign: "+" or "-".
	 165  //
	 166  // If the base argument is 0, the true base is implied by the string's
	 167  // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
	 168  // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
	 169  // underscore characters are permitted as defined by the Go syntax for
	 170  // integer literals.
	 171  //
	 172  // The bitSize argument specifies the integer type
	 173  // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
	 174  // correspond to int, int8, int16, int32, and int64.
	 175  // If bitSize is below 0 or above 64, an error is returned.
	 176  //
	 177  // The errors that ParseInt returns have concrete type *NumError
	 178  // and include err.Num = s. If s is empty or contains invalid
	 179  // digits, err.Err = ErrSyntax and the returned value is 0;
	 180  // if the value corresponding to s cannot be represented by a
	 181  // signed integer of the given size, err.Err = ErrRange and the
	 182  // returned value is the maximum magnitude integer of the
	 183  // appropriate bitSize and sign.
	 184  func ParseInt(s string, base int, bitSize int) (i int64, err error) {
	 185  	const fnParseInt = "ParseInt"
	 186  
	 187  	if s == "" {
	 188  		return 0, syntaxError(fnParseInt, s)
	 189  	}
	 190  
	 191  	// Pick off leading sign.
	 192  	s0 := s
	 193  	neg := false
	 194  	if s[0] == '+' {
	 195  		s = s[1:]
	 196  	} else if s[0] == '-' {
	 197  		neg = true
	 198  		s = s[1:]
	 199  	}
	 200  
	 201  	// Convert unsigned and check range.
	 202  	var un uint64
	 203  	un, err = ParseUint(s, base, bitSize)
	 204  	if err != nil && err.(*NumError).Err != ErrRange {
	 205  		err.(*NumError).Func = fnParseInt
	 206  		err.(*NumError).Num = s0
	 207  		return 0, err
	 208  	}
	 209  
	 210  	if bitSize == 0 {
	 211  		bitSize = IntSize
	 212  	}
	 213  
	 214  	cutoff := uint64(1 << uint(bitSize-1))
	 215  	if !neg && un >= cutoff {
	 216  		return int64(cutoff - 1), rangeError(fnParseInt, s0)
	 217  	}
	 218  	if neg && un > cutoff {
	 219  		return -int64(cutoff), rangeError(fnParseInt, s0)
	 220  	}
	 221  	n := int64(un)
	 222  	if neg {
	 223  		n = -n
	 224  	}
	 225  	return n, nil
	 226  }
	 227  
	 228  // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
	 229  func Atoi(s string) (int, error) {
	 230  	const fnAtoi = "Atoi"
	 231  
	 232  	sLen := len(s)
	 233  	if intSize == 32 && (0 < sLen && sLen < 10) ||
	 234  		intSize == 64 && (0 < sLen && sLen < 19) {
	 235  		// Fast path for small integers that fit int type.
	 236  		s0 := s
	 237  		if s[0] == '-' || s[0] == '+' {
	 238  			s = s[1:]
	 239  			if len(s) < 1 {
	 240  				return 0, &NumError{fnAtoi, s0, ErrSyntax}
	 241  			}
	 242  		}
	 243  
	 244  		n := 0
	 245  		for _, ch := range []byte(s) {
	 246  			ch -= '0'
	 247  			if ch > 9 {
	 248  				return 0, &NumError{fnAtoi, s0, ErrSyntax}
	 249  			}
	 250  			n = n*10 + int(ch)
	 251  		}
	 252  		if s0[0] == '-' {
	 253  			n = -n
	 254  		}
	 255  		return n, nil
	 256  	}
	 257  
	 258  	// Slow path for invalid, big, or underscored integers.
	 259  	i64, err := ParseInt(s, 10, 0)
	 260  	if nerr, ok := err.(*NumError); ok {
	 261  		nerr.Func = fnAtoi
	 262  	}
	 263  	return int(i64), err
	 264  }
	 265  
	 266  // underscoreOK reports whether the underscores in s are allowed.
	 267  // Checking them in this one function lets all the parsers skip over them simply.
	 268  // Underscore must appear only between digits or between a base prefix and a digit.
	 269  func underscoreOK(s string) bool {
	 270  	// saw tracks the last character (class) we saw:
	 271  	// ^ for beginning of number,
	 272  	// 0 for a digit or base prefix,
	 273  	// _ for an underscore,
	 274  	// ! for none of the above.
	 275  	saw := '^'
	 276  	i := 0
	 277  
	 278  	// Optional sign.
	 279  	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
	 280  		s = s[1:]
	 281  	}
	 282  
	 283  	// Optional base prefix.
	 284  	hex := false
	 285  	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
	 286  		i = 2
	 287  		saw = '0' // base prefix counts as a digit for "underscore as digit separator"
	 288  		hex = lower(s[1]) == 'x'
	 289  	}
	 290  
	 291  	// Number proper.
	 292  	for ; i < len(s); i++ {
	 293  		// Digits are always okay.
	 294  		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
	 295  			saw = '0'
	 296  			continue
	 297  		}
	 298  		// Underscore must follow digit.
	 299  		if s[i] == '_' {
	 300  			if saw != '0' {
	 301  				return false
	 302  			}
	 303  			saw = '_'
	 304  			continue
	 305  		}
	 306  		// Underscore must also be followed by digit.
	 307  		if saw == '_' {
	 308  			return false
	 309  		}
	 310  		// Saw non-digit, non-underscore.
	 311  		saw = '!'
	 312  	}
	 313  	return saw != '_'
	 314  }
	 315
View as plain text