...

Source file src/fmt/scan.go

Documentation: fmt

		 1  // Copyright 2010 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package fmt
		 6  
		 7  import (
		 8  	"errors"
		 9  	"io"
		10  	"math"
		11  	"os"
		12  	"reflect"
		13  	"strconv"
		14  	"sync"
		15  	"unicode/utf8"
		16  )
		17  
		18  // ScanState represents the scanner state passed to custom scanners.
		19  // Scanners may do rune-at-a-time scanning or ask the ScanState
		20  // to discover the next space-delimited token.
		21  type ScanState interface {
		22  	// ReadRune reads the next rune (Unicode code point) from the input.
		23  	// If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
		24  	// return EOF after returning the first '\n' or when reading beyond
		25  	// the specified width.
		26  	ReadRune() (r rune, size int, err error)
		27  	// UnreadRune causes the next call to ReadRune to return the same rune.
		28  	UnreadRune() error
		29  	// SkipSpace skips space in the input. Newlines are treated appropriately
		30  	// for the operation being performed; see the package documentation
		31  	// for more information.
		32  	SkipSpace()
		33  	// Token skips space in the input if skipSpace is true, then returns the
		34  	// run of Unicode code points c satisfying f(c).	If f is nil,
		35  	// !unicode.IsSpace(c) is used; that is, the token will hold non-space
		36  	// characters. Newlines are treated appropriately for the operation being
		37  	// performed; see the package documentation for more information.
		38  	// The returned slice points to shared data that may be overwritten
		39  	// by the next call to Token, a call to a Scan function using the ScanState
		40  	// as input, or when the calling Scan method returns.
		41  	Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
		42  	// Width returns the value of the width option and whether it has been set.
		43  	// The unit is Unicode code points.
		44  	Width() (wid int, ok bool)
		45  	// Because ReadRune is implemented by the interface, Read should never be
		46  	// called by the scanning routines and a valid implementation of
		47  	// ScanState may choose always to return an error from Read.
		48  	Read(buf []byte) (n int, err error)
		49  }
		50  
		51  // Scanner is implemented by any value that has a Scan method, which scans
		52  // the input for the representation of a value and stores the result in the
		53  // receiver, which must be a pointer to be useful. The Scan method is called
		54  // for any argument to Scan, Scanf, or Scanln that implements it.
		55  type Scanner interface {
		56  	Scan(state ScanState, verb rune) error
		57  }
		58  
		59  // Scan scans text read from standard input, storing successive
		60  // space-separated values into successive arguments. Newlines count
		61  // as space. It returns the number of items successfully scanned.
		62  // If that is less than the number of arguments, err will report why.
		63  func Scan(a ...interface{}) (n int, err error) {
		64  	return Fscan(os.Stdin, a...)
		65  }
		66  
		67  // Scanln is similar to Scan, but stops scanning at a newline and
		68  // after the final item there must be a newline or EOF.
		69  func Scanln(a ...interface{}) (n int, err error) {
		70  	return Fscanln(os.Stdin, a...)
		71  }
		72  
		73  // Scanf scans text read from standard input, storing successive
		74  // space-separated values into successive arguments as determined by
		75  // the format. It returns the number of items successfully scanned.
		76  // If that is less than the number of arguments, err will report why.
		77  // Newlines in the input must match newlines in the format.
		78  // The one exception: the verb %c always scans the next rune in the
		79  // input, even if it is a space (or tab etc.) or newline.
		80  func Scanf(format string, a ...interface{}) (n int, err error) {
		81  	return Fscanf(os.Stdin, format, a...)
		82  }
		83  
		84  type stringReader string
		85  
		86  func (r *stringReader) Read(b []byte) (n int, err error) {
		87  	n = copy(b, *r)
		88  	*r = (*r)[n:]
		89  	if n == 0 {
		90  		err = io.EOF
		91  	}
		92  	return
		93  }
		94  
		95  // Sscan scans the argument string, storing successive space-separated
		96  // values into successive arguments. Newlines count as space. It
		97  // returns the number of items successfully scanned. If that is less
		98  // than the number of arguments, err will report why.
		99  func Sscan(str string, a ...interface{}) (n int, err error) {
	 100  	return Fscan((*stringReader)(&str), a...)
	 101  }
	 102  
	 103  // Sscanln is similar to Sscan, but stops scanning at a newline and
	 104  // after the final item there must be a newline or EOF.
	 105  func Sscanln(str string, a ...interface{}) (n int, err error) {
	 106  	return Fscanln((*stringReader)(&str), a...)
	 107  }
	 108  
	 109  // Sscanf scans the argument string, storing successive space-separated
	 110  // values into successive arguments as determined by the format. It
	 111  // returns the number of items successfully parsed.
	 112  // Newlines in the input must match newlines in the format.
	 113  func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
	 114  	return Fscanf((*stringReader)(&str), format, a...)
	 115  }
	 116  
	 117  // Fscan scans text read from r, storing successive space-separated
	 118  // values into successive arguments. Newlines count as space. It
	 119  // returns the number of items successfully scanned. If that is less
	 120  // than the number of arguments, err will report why.
	 121  func Fscan(r io.Reader, a ...interface{}) (n int, err error) {
	 122  	s, old := newScanState(r, true, false)
	 123  	n, err = s.doScan(a)
	 124  	s.free(old)
	 125  	return
	 126  }
	 127  
	 128  // Fscanln is similar to Fscan, but stops scanning at a newline and
	 129  // after the final item there must be a newline or EOF.
	 130  func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
	 131  	s, old := newScanState(r, false, true)
	 132  	n, err = s.doScan(a)
	 133  	s.free(old)
	 134  	return
	 135  }
	 136  
	 137  // Fscanf scans text read from r, storing successive space-separated
	 138  // values into successive arguments as determined by the format. It
	 139  // returns the number of items successfully parsed.
	 140  // Newlines in the input must match newlines in the format.
	 141  func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
	 142  	s, old := newScanState(r, false, false)
	 143  	n, err = s.doScanf(format, a)
	 144  	s.free(old)
	 145  	return
	 146  }
	 147  
	 148  // scanError represents an error generated by the scanning software.
	 149  // It's used as a unique signature to identify such errors when recovering.
	 150  type scanError struct {
	 151  	err error
	 152  }
	 153  
	 154  const eof = -1
	 155  
	 156  // ss is the internal implementation of ScanState.
	 157  type ss struct {
	 158  	rs		io.RuneScanner // where to read input
	 159  	buf	 buffer				 // token accumulator
	 160  	count int						// runes consumed so far.
	 161  	atEOF bool					 // already read EOF
	 162  	ssave
	 163  }
	 164  
	 165  // ssave holds the parts of ss that need to be
	 166  // saved and restored on recursive scans.
	 167  type ssave struct {
	 168  	validSave bool // is or was a part of an actual ss.
	 169  	nlIsEnd	 bool // whether newline terminates scan
	 170  	nlIsSpace bool // whether newline counts as white space
	 171  	argLimit	int	// max value of ss.count for this arg; argLimit <= limit
	 172  	limit		 int	// max value of ss.count.
	 173  	maxWid		int	// width of this arg.
	 174  }
	 175  
	 176  // The Read method is only in ScanState so that ScanState
	 177  // satisfies io.Reader. It will never be called when used as
	 178  // intended, so there is no need to make it actually work.
	 179  func (s *ss) Read(buf []byte) (n int, err error) {
	 180  	return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
	 181  }
	 182  
	 183  func (s *ss) ReadRune() (r rune, size int, err error) {
	 184  	if s.atEOF || s.count >= s.argLimit {
	 185  		err = io.EOF
	 186  		return
	 187  	}
	 188  
	 189  	r, size, err = s.rs.ReadRune()
	 190  	if err == nil {
	 191  		s.count++
	 192  		if s.nlIsEnd && r == '\n' {
	 193  			s.atEOF = true
	 194  		}
	 195  	} else if err == io.EOF {
	 196  		s.atEOF = true
	 197  	}
	 198  	return
	 199  }
	 200  
	 201  func (s *ss) Width() (wid int, ok bool) {
	 202  	if s.maxWid == hugeWid {
	 203  		return 0, false
	 204  	}
	 205  	return s.maxWid, true
	 206  }
	 207  
	 208  // The public method returns an error; this private one panics.
	 209  // If getRune reaches EOF, the return value is EOF (-1).
	 210  func (s *ss) getRune() (r rune) {
	 211  	r, _, err := s.ReadRune()
	 212  	if err != nil {
	 213  		if err == io.EOF {
	 214  			return eof
	 215  		}
	 216  		s.error(err)
	 217  	}
	 218  	return
	 219  }
	 220  
	 221  // mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
	 222  // It is called in cases such as string scanning where an EOF is a
	 223  // syntax error.
	 224  func (s *ss) mustReadRune() (r rune) {
	 225  	r = s.getRune()
	 226  	if r == eof {
	 227  		s.error(io.ErrUnexpectedEOF)
	 228  	}
	 229  	return
	 230  }
	 231  
	 232  func (s *ss) UnreadRune() error {
	 233  	s.rs.UnreadRune()
	 234  	s.atEOF = false
	 235  	s.count--
	 236  	return nil
	 237  }
	 238  
	 239  func (s *ss) error(err error) {
	 240  	panic(scanError{err})
	 241  }
	 242  
	 243  func (s *ss) errorString(err string) {
	 244  	panic(scanError{errors.New(err)})
	 245  }
	 246  
	 247  func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
	 248  	defer func() {
	 249  		if e := recover(); e != nil {
	 250  			if se, ok := e.(scanError); ok {
	 251  				err = se.err
	 252  			} else {
	 253  				panic(e)
	 254  			}
	 255  		}
	 256  	}()
	 257  	if f == nil {
	 258  		f = notSpace
	 259  	}
	 260  	s.buf = s.buf[:0]
	 261  	tok = s.token(skipSpace, f)
	 262  	return
	 263  }
	 264  
	 265  // space is a copy of the unicode.White_Space ranges,
	 266  // to avoid depending on package unicode.
	 267  var space = [][2]uint16{
	 268  	{0x0009, 0x000d},
	 269  	{0x0020, 0x0020},
	 270  	{0x0085, 0x0085},
	 271  	{0x00a0, 0x00a0},
	 272  	{0x1680, 0x1680},
	 273  	{0x2000, 0x200a},
	 274  	{0x2028, 0x2029},
	 275  	{0x202f, 0x202f},
	 276  	{0x205f, 0x205f},
	 277  	{0x3000, 0x3000},
	 278  }
	 279  
	 280  func isSpace(r rune) bool {
	 281  	if r >= 1<<16 {
	 282  		return false
	 283  	}
	 284  	rx := uint16(r)
	 285  	for _, rng := range space {
	 286  		if rx < rng[0] {
	 287  			return false
	 288  		}
	 289  		if rx <= rng[1] {
	 290  			return true
	 291  		}
	 292  	}
	 293  	return false
	 294  }
	 295  
	 296  // notSpace is the default scanning function used in Token.
	 297  func notSpace(r rune) bool {
	 298  	return !isSpace(r)
	 299  }
	 300  
	 301  // readRune is a structure to enable reading UTF-8 encoded code points
	 302  // from an io.Reader. It is used if the Reader given to the scanner does
	 303  // not already implement io.RuneScanner.
	 304  type readRune struct {
	 305  	reader	 io.Reader
	 306  	buf			[utf8.UTFMax]byte // used only inside ReadRune
	 307  	pending	int							 // number of bytes in pendBuf; only >0 for bad UTF-8
	 308  	pendBuf	[utf8.UTFMax]byte // bytes left over
	 309  	peekRune rune							// if >=0 next rune; when <0 is ^(previous Rune)
	 310  }
	 311  
	 312  // readByte returns the next byte from the input, which may be
	 313  // left over from a previous read if the UTF-8 was ill-formed.
	 314  func (r *readRune) readByte() (b byte, err error) {
	 315  	if r.pending > 0 {
	 316  		b = r.pendBuf[0]
	 317  		copy(r.pendBuf[0:], r.pendBuf[1:])
	 318  		r.pending--
	 319  		return
	 320  	}
	 321  	n, err := io.ReadFull(r.reader, r.pendBuf[:1])
	 322  	if n != 1 {
	 323  		return 0, err
	 324  	}
	 325  	return r.pendBuf[0], err
	 326  }
	 327  
	 328  // ReadRune returns the next UTF-8 encoded code point from the
	 329  // io.Reader inside r.
	 330  func (r *readRune) ReadRune() (rr rune, size int, err error) {
	 331  	if r.peekRune >= 0 {
	 332  		rr = r.peekRune
	 333  		r.peekRune = ^r.peekRune
	 334  		size = utf8.RuneLen(rr)
	 335  		return
	 336  	}
	 337  	r.buf[0], err = r.readByte()
	 338  	if err != nil {
	 339  		return
	 340  	}
	 341  	if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
	 342  		rr = rune(r.buf[0])
	 343  		size = 1 // Known to be 1.
	 344  		// Flip the bits of the rune so it's available to UnreadRune.
	 345  		r.peekRune = ^rr
	 346  		return
	 347  	}
	 348  	var n int
	 349  	for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
	 350  		r.buf[n], err = r.readByte()
	 351  		if err != nil {
	 352  			if err == io.EOF {
	 353  				err = nil
	 354  				break
	 355  			}
	 356  			return
	 357  		}
	 358  	}
	 359  	rr, size = utf8.DecodeRune(r.buf[:n])
	 360  	if size < n { // an error, save the bytes for the next read
	 361  		copy(r.pendBuf[r.pending:], r.buf[size:n])
	 362  		r.pending += n - size
	 363  	}
	 364  	// Flip the bits of the rune so it's available to UnreadRune.
	 365  	r.peekRune = ^rr
	 366  	return
	 367  }
	 368  
	 369  func (r *readRune) UnreadRune() error {
	 370  	if r.peekRune >= 0 {
	 371  		return errors.New("fmt: scanning called UnreadRune with no rune available")
	 372  	}
	 373  	// Reverse bit flip of previously read rune to obtain valid >=0 state.
	 374  	r.peekRune = ^r.peekRune
	 375  	return nil
	 376  }
	 377  
	 378  var ssFree = sync.Pool{
	 379  	New: func() interface{} { return new(ss) },
	 380  }
	 381  
	 382  // newScanState allocates a new ss struct or grab a cached one.
	 383  func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
	 384  	s = ssFree.Get().(*ss)
	 385  	if rs, ok := r.(io.RuneScanner); ok {
	 386  		s.rs = rs
	 387  	} else {
	 388  		s.rs = &readRune{reader: r, peekRune: -1}
	 389  	}
	 390  	s.nlIsSpace = nlIsSpace
	 391  	s.nlIsEnd = nlIsEnd
	 392  	s.atEOF = false
	 393  	s.limit = hugeWid
	 394  	s.argLimit = hugeWid
	 395  	s.maxWid = hugeWid
	 396  	s.validSave = true
	 397  	s.count = 0
	 398  	return
	 399  }
	 400  
	 401  // free saves used ss structs in ssFree; avoid an allocation per invocation.
	 402  func (s *ss) free(old ssave) {
	 403  	// If it was used recursively, just restore the old state.
	 404  	if old.validSave {
	 405  		s.ssave = old
	 406  		return
	 407  	}
	 408  	// Don't hold on to ss structs with large buffers.
	 409  	if cap(s.buf) > 1024 {
	 410  		return
	 411  	}
	 412  	s.buf = s.buf[:0]
	 413  	s.rs = nil
	 414  	ssFree.Put(s)
	 415  }
	 416  
	 417  // SkipSpace provides Scan methods the ability to skip space and newline
	 418  // characters in keeping with the current scanning mode set by format strings
	 419  // and Scan/Scanln.
	 420  func (s *ss) SkipSpace() {
	 421  	for {
	 422  		r := s.getRune()
	 423  		if r == eof {
	 424  			return
	 425  		}
	 426  		if r == '\r' && s.peek("\n") {
	 427  			continue
	 428  		}
	 429  		if r == '\n' {
	 430  			if s.nlIsSpace {
	 431  				continue
	 432  			}
	 433  			s.errorString("unexpected newline")
	 434  			return
	 435  		}
	 436  		if !isSpace(r) {
	 437  			s.UnreadRune()
	 438  			break
	 439  		}
	 440  	}
	 441  }
	 442  
	 443  // token returns the next space-delimited string from the input. It
	 444  // skips white space. For Scanln, it stops at newlines. For Scan,
	 445  // newlines are treated as spaces.
	 446  func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
	 447  	if skipSpace {
	 448  		s.SkipSpace()
	 449  	}
	 450  	// read until white space or newline
	 451  	for {
	 452  		r := s.getRune()
	 453  		if r == eof {
	 454  			break
	 455  		}
	 456  		if !f(r) {
	 457  			s.UnreadRune()
	 458  			break
	 459  		}
	 460  		s.buf.writeRune(r)
	 461  	}
	 462  	return s.buf
	 463  }
	 464  
	 465  var complexError = errors.New("syntax error scanning complex number")
	 466  var boolError = errors.New("syntax error scanning boolean")
	 467  
	 468  func indexRune(s string, r rune) int {
	 469  	for i, c := range s {
	 470  		if c == r {
	 471  			return i
	 472  		}
	 473  	}
	 474  	return -1
	 475  }
	 476  
	 477  // consume reads the next rune in the input and reports whether it is in the ok string.
	 478  // If accept is true, it puts the character into the input token.
	 479  func (s *ss) consume(ok string, accept bool) bool {
	 480  	r := s.getRune()
	 481  	if r == eof {
	 482  		return false
	 483  	}
	 484  	if indexRune(ok, r) >= 0 {
	 485  		if accept {
	 486  			s.buf.writeRune(r)
	 487  		}
	 488  		return true
	 489  	}
	 490  	if r != eof && accept {
	 491  		s.UnreadRune()
	 492  	}
	 493  	return false
	 494  }
	 495  
	 496  // peek reports whether the next character is in the ok string, without consuming it.
	 497  func (s *ss) peek(ok string) bool {
	 498  	r := s.getRune()
	 499  	if r != eof {
	 500  		s.UnreadRune()
	 501  	}
	 502  	return indexRune(ok, r) >= 0
	 503  }
	 504  
	 505  func (s *ss) notEOF() {
	 506  	// Guarantee there is data to be read.
	 507  	if r := s.getRune(); r == eof {
	 508  		panic(io.EOF)
	 509  	}
	 510  	s.UnreadRune()
	 511  }
	 512  
	 513  // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
	 514  // buffer and returns true. Otherwise it return false.
	 515  func (s *ss) accept(ok string) bool {
	 516  	return s.consume(ok, true)
	 517  }
	 518  
	 519  // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
	 520  func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
	 521  	for _, v := range okVerbs {
	 522  		if v == verb {
	 523  			return true
	 524  		}
	 525  	}
	 526  	s.errorString("bad verb '%" + string(verb) + "' for " + typ)
	 527  	return false
	 528  }
	 529  
	 530  // scanBool returns the value of the boolean represented by the next token.
	 531  func (s *ss) scanBool(verb rune) bool {
	 532  	s.SkipSpace()
	 533  	s.notEOF()
	 534  	if !s.okVerb(verb, "tv", "boolean") {
	 535  		return false
	 536  	}
	 537  	// Syntax-checking a boolean is annoying. We're not fastidious about case.
	 538  	switch s.getRune() {
	 539  	case '0':
	 540  		return false
	 541  	case '1':
	 542  		return true
	 543  	case 't', 'T':
	 544  		if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
	 545  			s.error(boolError)
	 546  		}
	 547  		return true
	 548  	case 'f', 'F':
	 549  		if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
	 550  			s.error(boolError)
	 551  		}
	 552  		return false
	 553  	}
	 554  	return false
	 555  }
	 556  
	 557  // Numerical elements
	 558  const (
	 559  	binaryDigits			= "01"
	 560  	octalDigits			 = "01234567"
	 561  	decimalDigits		 = "0123456789"
	 562  	hexadecimalDigits = "0123456789aAbBcCdDeEfF"
	 563  	sign							= "+-"
	 564  	period						= "."
	 565  	exponent					= "eEpP"
	 566  )
	 567  
	 568  // getBase returns the numeric base represented by the verb and its digit string.
	 569  func (s *ss) getBase(verb rune) (base int, digits string) {
	 570  	s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
	 571  	base = 10
	 572  	digits = decimalDigits
	 573  	switch verb {
	 574  	case 'b':
	 575  		base = 2
	 576  		digits = binaryDigits
	 577  	case 'o':
	 578  		base = 8
	 579  		digits = octalDigits
	 580  	case 'x', 'X', 'U':
	 581  		base = 16
	 582  		digits = hexadecimalDigits
	 583  	}
	 584  	return
	 585  }
	 586  
	 587  // scanNumber returns the numerical string with specified digits starting here.
	 588  func (s *ss) scanNumber(digits string, haveDigits bool) string {
	 589  	if !haveDigits {
	 590  		s.notEOF()
	 591  		if !s.accept(digits) {
	 592  			s.errorString("expected integer")
	 593  		}
	 594  	}
	 595  	for s.accept(digits) {
	 596  	}
	 597  	return string(s.buf)
	 598  }
	 599  
	 600  // scanRune returns the next rune value in the input.
	 601  func (s *ss) scanRune(bitSize int) int64 {
	 602  	s.notEOF()
	 603  	r := s.getRune()
	 604  	n := uint(bitSize)
	 605  	x := (int64(r) << (64 - n)) >> (64 - n)
	 606  	if x != int64(r) {
	 607  		s.errorString("overflow on character value " + string(r))
	 608  	}
	 609  	return int64(r)
	 610  }
	 611  
	 612  // scanBasePrefix reports whether the integer begins with a base prefix
	 613  // and returns the base, digit string, and whether a zero was found.
	 614  // It is called only if the verb is %v.
	 615  func (s *ss) scanBasePrefix() (base int, digits string, zeroFound bool) {
	 616  	if !s.peek("0") {
	 617  		return 0, decimalDigits + "_", false
	 618  	}
	 619  	s.accept("0")
	 620  	// Special cases for 0, 0b, 0o, 0x.
	 621  	switch {
	 622  	case s.peek("bB"):
	 623  		s.consume("bB", true)
	 624  		return 0, binaryDigits + "_", true
	 625  	case s.peek("oO"):
	 626  		s.consume("oO", true)
	 627  		return 0, octalDigits + "_", true
	 628  	case s.peek("xX"):
	 629  		s.consume("xX", true)
	 630  		return 0, hexadecimalDigits + "_", true
	 631  	default:
	 632  		return 0, octalDigits + "_", true
	 633  	}
	 634  }
	 635  
	 636  // scanInt returns the value of the integer represented by the next
	 637  // token, checking for overflow. Any error is stored in s.err.
	 638  func (s *ss) scanInt(verb rune, bitSize int) int64 {
	 639  	if verb == 'c' {
	 640  		return s.scanRune(bitSize)
	 641  	}
	 642  	s.SkipSpace()
	 643  	s.notEOF()
	 644  	base, digits := s.getBase(verb)
	 645  	haveDigits := false
	 646  	if verb == 'U' {
	 647  		if !s.consume("U", false) || !s.consume("+", false) {
	 648  			s.errorString("bad unicode format ")
	 649  		}
	 650  	} else {
	 651  		s.accept(sign) // If there's a sign, it will be left in the token buffer.
	 652  		if verb == 'v' {
	 653  			base, digits, haveDigits = s.scanBasePrefix()
	 654  		}
	 655  	}
	 656  	tok := s.scanNumber(digits, haveDigits)
	 657  	i, err := strconv.ParseInt(tok, base, 64)
	 658  	if err != nil {
	 659  		s.error(err)
	 660  	}
	 661  	n := uint(bitSize)
	 662  	x := (i << (64 - n)) >> (64 - n)
	 663  	if x != i {
	 664  		s.errorString("integer overflow on token " + tok)
	 665  	}
	 666  	return i
	 667  }
	 668  
	 669  // scanUint returns the value of the unsigned integer represented
	 670  // by the next token, checking for overflow. Any error is stored in s.err.
	 671  func (s *ss) scanUint(verb rune, bitSize int) uint64 {
	 672  	if verb == 'c' {
	 673  		return uint64(s.scanRune(bitSize))
	 674  	}
	 675  	s.SkipSpace()
	 676  	s.notEOF()
	 677  	base, digits := s.getBase(verb)
	 678  	haveDigits := false
	 679  	if verb == 'U' {
	 680  		if !s.consume("U", false) || !s.consume("+", false) {
	 681  			s.errorString("bad unicode format ")
	 682  		}
	 683  	} else if verb == 'v' {
	 684  		base, digits, haveDigits = s.scanBasePrefix()
	 685  	}
	 686  	tok := s.scanNumber(digits, haveDigits)
	 687  	i, err := strconv.ParseUint(tok, base, 64)
	 688  	if err != nil {
	 689  		s.error(err)
	 690  	}
	 691  	n := uint(bitSize)
	 692  	x := (i << (64 - n)) >> (64 - n)
	 693  	if x != i {
	 694  		s.errorString("unsigned integer overflow on token " + tok)
	 695  	}
	 696  	return i
	 697  }
	 698  
	 699  // floatToken returns the floating-point number starting here, no longer than swid
	 700  // if the width is specified. It's not rigorous about syntax because it doesn't check that
	 701  // we have at least some digits, but Atof will do that.
	 702  func (s *ss) floatToken() string {
	 703  	s.buf = s.buf[:0]
	 704  	// NaN?
	 705  	if s.accept("nN") && s.accept("aA") && s.accept("nN") {
	 706  		return string(s.buf)
	 707  	}
	 708  	// leading sign?
	 709  	s.accept(sign)
	 710  	// Inf?
	 711  	if s.accept("iI") && s.accept("nN") && s.accept("fF") {
	 712  		return string(s.buf)
	 713  	}
	 714  	digits := decimalDigits + "_"
	 715  	exp := exponent
	 716  	if s.accept("0") && s.accept("xX") {
	 717  		digits = hexadecimalDigits + "_"
	 718  		exp = "pP"
	 719  	}
	 720  	// digits?
	 721  	for s.accept(digits) {
	 722  	}
	 723  	// decimal point?
	 724  	if s.accept(period) {
	 725  		// fraction?
	 726  		for s.accept(digits) {
	 727  		}
	 728  	}
	 729  	// exponent?
	 730  	if s.accept(exp) {
	 731  		// leading sign?
	 732  		s.accept(sign)
	 733  		// digits?
	 734  		for s.accept(decimalDigits + "_") {
	 735  		}
	 736  	}
	 737  	return string(s.buf)
	 738  }
	 739  
	 740  // complexTokens returns the real and imaginary parts of the complex number starting here.
	 741  // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
	 742  // number and there are no spaces within.
	 743  func (s *ss) complexTokens() (real, imag string) {
	 744  	// TODO: accept N and Ni independently?
	 745  	parens := s.accept("(")
	 746  	real = s.floatToken()
	 747  	s.buf = s.buf[:0]
	 748  	// Must now have a sign.
	 749  	if !s.accept("+-") {
	 750  		s.error(complexError)
	 751  	}
	 752  	// Sign is now in buffer
	 753  	imagSign := string(s.buf)
	 754  	imag = s.floatToken()
	 755  	if !s.accept("i") {
	 756  		s.error(complexError)
	 757  	}
	 758  	if parens && !s.accept(")") {
	 759  		s.error(complexError)
	 760  	}
	 761  	return real, imagSign + imag
	 762  }
	 763  
	 764  func hasX(s string) bool {
	 765  	for i := 0; i < len(s); i++ {
	 766  		if s[i] == 'x' || s[i] == 'X' {
	 767  			return true
	 768  		}
	 769  	}
	 770  	return false
	 771  }
	 772  
	 773  // convertFloat converts the string to a float64value.
	 774  func (s *ss) convertFloat(str string, n int) float64 {
	 775  	// strconv.ParseFloat will handle "+0x1.fp+2",
	 776  	// but we have to implement our non-standard
	 777  	// decimal+binary exponent mix (1.2p4) ourselves.
	 778  	if p := indexRune(str, 'p'); p >= 0 && !hasX(str) {
	 779  		// Atof doesn't handle power-of-2 exponents,
	 780  		// but they're easy to evaluate.
	 781  		f, err := strconv.ParseFloat(str[:p], n)
	 782  		if err != nil {
	 783  			// Put full string into error.
	 784  			if e, ok := err.(*strconv.NumError); ok {
	 785  				e.Num = str
	 786  			}
	 787  			s.error(err)
	 788  		}
	 789  		m, err := strconv.Atoi(str[p+1:])
	 790  		if err != nil {
	 791  			// Put full string into error.
	 792  			if e, ok := err.(*strconv.NumError); ok {
	 793  				e.Num = str
	 794  			}
	 795  			s.error(err)
	 796  		}
	 797  		return math.Ldexp(f, m)
	 798  	}
	 799  	f, err := strconv.ParseFloat(str, n)
	 800  	if err != nil {
	 801  		s.error(err)
	 802  	}
	 803  	return f
	 804  }
	 805  
	 806  // convertComplex converts the next token to a complex128 value.
	 807  // The atof argument is a type-specific reader for the underlying type.
	 808  // If we're reading complex64, atof will parse float32s and convert them
	 809  // to float64's to avoid reproducing this code for each complex type.
	 810  func (s *ss) scanComplex(verb rune, n int) complex128 {
	 811  	if !s.okVerb(verb, floatVerbs, "complex") {
	 812  		return 0
	 813  	}
	 814  	s.SkipSpace()
	 815  	s.notEOF()
	 816  	sreal, simag := s.complexTokens()
	 817  	real := s.convertFloat(sreal, n/2)
	 818  	imag := s.convertFloat(simag, n/2)
	 819  	return complex(real, imag)
	 820  }
	 821  
	 822  // convertString returns the string represented by the next input characters.
	 823  // The format of the input is determined by the verb.
	 824  func (s *ss) convertString(verb rune) (str string) {
	 825  	if !s.okVerb(verb, "svqxX", "string") {
	 826  		return ""
	 827  	}
	 828  	s.SkipSpace()
	 829  	s.notEOF()
	 830  	switch verb {
	 831  	case 'q':
	 832  		str = s.quotedString()
	 833  	case 'x', 'X':
	 834  		str = s.hexString()
	 835  	default:
	 836  		str = string(s.token(true, notSpace)) // %s and %v just return the next word
	 837  	}
	 838  	return
	 839  }
	 840  
	 841  // quotedString returns the double- or back-quoted string represented by the next input characters.
	 842  func (s *ss) quotedString() string {
	 843  	s.notEOF()
	 844  	quote := s.getRune()
	 845  	switch quote {
	 846  	case '`':
	 847  		// Back-quoted: Anything goes until EOF or back quote.
	 848  		for {
	 849  			r := s.mustReadRune()
	 850  			if r == quote {
	 851  				break
	 852  			}
	 853  			s.buf.writeRune(r)
	 854  		}
	 855  		return string(s.buf)
	 856  	case '"':
	 857  		// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
	 858  		s.buf.writeByte('"')
	 859  		for {
	 860  			r := s.mustReadRune()
	 861  			s.buf.writeRune(r)
	 862  			if r == '\\' {
	 863  				// In a legal backslash escape, no matter how long, only the character
	 864  				// immediately after the escape can itself be a backslash or quote.
	 865  				// Thus we only need to protect the first character after the backslash.
	 866  				s.buf.writeRune(s.mustReadRune())
	 867  			} else if r == '"' {
	 868  				break
	 869  			}
	 870  		}
	 871  		result, err := strconv.Unquote(string(s.buf))
	 872  		if err != nil {
	 873  			s.error(err)
	 874  		}
	 875  		return result
	 876  	default:
	 877  		s.errorString("expected quoted string")
	 878  	}
	 879  	return ""
	 880  }
	 881  
	 882  // hexDigit returns the value of the hexadecimal digit.
	 883  func hexDigit(d rune) (int, bool) {
	 884  	digit := int(d)
	 885  	switch digit {
	 886  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
	 887  		return digit - '0', true
	 888  	case 'a', 'b', 'c', 'd', 'e', 'f':
	 889  		return 10 + digit - 'a', true
	 890  	case 'A', 'B', 'C', 'D', 'E', 'F':
	 891  		return 10 + digit - 'A', true
	 892  	}
	 893  	return -1, false
	 894  }
	 895  
	 896  // hexByte returns the next hex-encoded (two-character) byte from the input.
	 897  // It returns ok==false if the next bytes in the input do not encode a hex byte.
	 898  // If the first byte is hex and the second is not, processing stops.
	 899  func (s *ss) hexByte() (b byte, ok bool) {
	 900  	rune1 := s.getRune()
	 901  	if rune1 == eof {
	 902  		return
	 903  	}
	 904  	value1, ok := hexDigit(rune1)
	 905  	if !ok {
	 906  		s.UnreadRune()
	 907  		return
	 908  	}
	 909  	value2, ok := hexDigit(s.mustReadRune())
	 910  	if !ok {
	 911  		s.errorString("illegal hex digit")
	 912  		return
	 913  	}
	 914  	return byte(value1<<4 | value2), true
	 915  }
	 916  
	 917  // hexString returns the space-delimited hexpair-encoded string.
	 918  func (s *ss) hexString() string {
	 919  	s.notEOF()
	 920  	for {
	 921  		b, ok := s.hexByte()
	 922  		if !ok {
	 923  			break
	 924  		}
	 925  		s.buf.writeByte(b)
	 926  	}
	 927  	if len(s.buf) == 0 {
	 928  		s.errorString("no hex data for %x string")
	 929  		return ""
	 930  	}
	 931  	return string(s.buf)
	 932  }
	 933  
	 934  const (
	 935  	floatVerbs = "beEfFgGv"
	 936  
	 937  	hugeWid = 1 << 30
	 938  
	 939  	intBits		 = 32 << (^uint(0) >> 63)
	 940  	uintptrBits = 32 << (^uintptr(0) >> 63)
	 941  )
	 942  
	 943  // scanPercent scans a literal percent character.
	 944  func (s *ss) scanPercent() {
	 945  	s.SkipSpace()
	 946  	s.notEOF()
	 947  	if !s.accept("%") {
	 948  		s.errorString("missing literal %")
	 949  	}
	 950  }
	 951  
	 952  // scanOne scans a single value, deriving the scanner from the type of the argument.
	 953  func (s *ss) scanOne(verb rune, arg interface{}) {
	 954  	s.buf = s.buf[:0]
	 955  	var err error
	 956  	// If the parameter has its own Scan method, use that.
	 957  	if v, ok := arg.(Scanner); ok {
	 958  		err = v.Scan(s, verb)
	 959  		if err != nil {
	 960  			if err == io.EOF {
	 961  				err = io.ErrUnexpectedEOF
	 962  			}
	 963  			s.error(err)
	 964  		}
	 965  		return
	 966  	}
	 967  
	 968  	switch v := arg.(type) {
	 969  	case *bool:
	 970  		*v = s.scanBool(verb)
	 971  	case *complex64:
	 972  		*v = complex64(s.scanComplex(verb, 64))
	 973  	case *complex128:
	 974  		*v = s.scanComplex(verb, 128)
	 975  	case *int:
	 976  		*v = int(s.scanInt(verb, intBits))
	 977  	case *int8:
	 978  		*v = int8(s.scanInt(verb, 8))
	 979  	case *int16:
	 980  		*v = int16(s.scanInt(verb, 16))
	 981  	case *int32:
	 982  		*v = int32(s.scanInt(verb, 32))
	 983  	case *int64:
	 984  		*v = s.scanInt(verb, 64)
	 985  	case *uint:
	 986  		*v = uint(s.scanUint(verb, intBits))
	 987  	case *uint8:
	 988  		*v = uint8(s.scanUint(verb, 8))
	 989  	case *uint16:
	 990  		*v = uint16(s.scanUint(verb, 16))
	 991  	case *uint32:
	 992  		*v = uint32(s.scanUint(verb, 32))
	 993  	case *uint64:
	 994  		*v = s.scanUint(verb, 64)
	 995  	case *uintptr:
	 996  		*v = uintptr(s.scanUint(verb, uintptrBits))
	 997  	// Floats are tricky because you want to scan in the precision of the result, not
	 998  	// scan in high precision and convert, in order to preserve the correct error condition.
	 999  	case *float32:
	1000  		if s.okVerb(verb, floatVerbs, "float32") {
	1001  			s.SkipSpace()
	1002  			s.notEOF()
	1003  			*v = float32(s.convertFloat(s.floatToken(), 32))
	1004  		}
	1005  	case *float64:
	1006  		if s.okVerb(verb, floatVerbs, "float64") {
	1007  			s.SkipSpace()
	1008  			s.notEOF()
	1009  			*v = s.convertFloat(s.floatToken(), 64)
	1010  		}
	1011  	case *string:
	1012  		*v = s.convertString(verb)
	1013  	case *[]byte:
	1014  		// We scan to string and convert so we get a copy of the data.
	1015  		// If we scanned to bytes, the slice would point at the buffer.
	1016  		*v = []byte(s.convertString(verb))
	1017  	default:
	1018  		val := reflect.ValueOf(v)
	1019  		ptr := val
	1020  		if ptr.Kind() != reflect.Ptr {
	1021  			s.errorString("type not a pointer: " + val.Type().String())
	1022  			return
	1023  		}
	1024  		switch v := ptr.Elem(); v.Kind() {
	1025  		case reflect.Bool:
	1026  			v.SetBool(s.scanBool(verb))
	1027  		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
	1028  			v.SetInt(s.scanInt(verb, v.Type().Bits()))
	1029  		case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
	1030  			v.SetUint(s.scanUint(verb, v.Type().Bits()))
	1031  		case reflect.String:
	1032  			v.SetString(s.convertString(verb))
	1033  		case reflect.Slice:
	1034  			// For now, can only handle (renamed) []byte.
	1035  			typ := v.Type()
	1036  			if typ.Elem().Kind() != reflect.Uint8 {
	1037  				s.errorString("can't scan type: " + val.Type().String())
	1038  			}
	1039  			str := s.convertString(verb)
	1040  			v.Set(reflect.MakeSlice(typ, len(str), len(str)))
	1041  			for i := 0; i < len(str); i++ {
	1042  				v.Index(i).SetUint(uint64(str[i]))
	1043  			}
	1044  		case reflect.Float32, reflect.Float64:
	1045  			s.SkipSpace()
	1046  			s.notEOF()
	1047  			v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
	1048  		case reflect.Complex64, reflect.Complex128:
	1049  			v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
	1050  		default:
	1051  			s.errorString("can't scan type: " + val.Type().String())
	1052  		}
	1053  	}
	1054  }
	1055  
	1056  // errorHandler turns local panics into error returns.
	1057  func errorHandler(errp *error) {
	1058  	if e := recover(); e != nil {
	1059  		if se, ok := e.(scanError); ok { // catch local error
	1060  			*errp = se.err
	1061  		} else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
	1062  			*errp = eof
	1063  		} else {
	1064  			panic(e)
	1065  		}
	1066  	}
	1067  }
	1068  
	1069  // doScan does the real work for scanning without a format string.
	1070  func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
	1071  	defer errorHandler(&err)
	1072  	for _, arg := range a {
	1073  		s.scanOne('v', arg)
	1074  		numProcessed++
	1075  	}
	1076  	// Check for newline (or EOF) if required (Scanln etc.).
	1077  	if s.nlIsEnd {
	1078  		for {
	1079  			r := s.getRune()
	1080  			if r == '\n' || r == eof {
	1081  				break
	1082  			}
	1083  			if !isSpace(r) {
	1084  				s.errorString("expected newline")
	1085  				break
	1086  			}
	1087  		}
	1088  	}
	1089  	return
	1090  }
	1091  
	1092  // advance determines whether the next characters in the input match
	1093  // those of the format. It returns the number of bytes (sic) consumed
	1094  // in the format. All runs of space characters in either input or
	1095  // format behave as a single space. Newlines are special, though:
	1096  // newlines in the format must match those in the input and vice versa.
	1097  // This routine also handles the %% case. If the return value is zero,
	1098  // either format starts with a % (with no following %) or the input
	1099  // is empty. If it is negative, the input did not match the string.
	1100  func (s *ss) advance(format string) (i int) {
	1101  	for i < len(format) {
	1102  		fmtc, w := utf8.DecodeRuneInString(format[i:])
	1103  
	1104  		// Space processing.
	1105  		// In the rest of this comment "space" means spaces other than newline.
	1106  		// Newline in the format matches input of zero or more spaces and then newline or end-of-input.
	1107  		// Spaces in the format before the newline are collapsed into the newline.
	1108  		// Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
	1109  		// Other spaces in the format match input of one or more spaces or end-of-input.
	1110  		if isSpace(fmtc) {
	1111  			newlines := 0
	1112  			trailingSpace := false
	1113  			for isSpace(fmtc) && i < len(format) {
	1114  				if fmtc == '\n' {
	1115  					newlines++
	1116  					trailingSpace = false
	1117  				} else {
	1118  					trailingSpace = true
	1119  				}
	1120  				i += w
	1121  				fmtc, w = utf8.DecodeRuneInString(format[i:])
	1122  			}
	1123  			for j := 0; j < newlines; j++ {
	1124  				inputc := s.getRune()
	1125  				for isSpace(inputc) && inputc != '\n' {
	1126  					inputc = s.getRune()
	1127  				}
	1128  				if inputc != '\n' && inputc != eof {
	1129  					s.errorString("newline in format does not match input")
	1130  				}
	1131  			}
	1132  			if trailingSpace {
	1133  				inputc := s.getRune()
	1134  				if newlines == 0 {
	1135  					// If the trailing space stood alone (did not follow a newline),
	1136  					// it must find at least one space to consume.
	1137  					if !isSpace(inputc) && inputc != eof {
	1138  						s.errorString("expected space in input to match format")
	1139  					}
	1140  					if inputc == '\n' {
	1141  						s.errorString("newline in input does not match format")
	1142  					}
	1143  				}
	1144  				for isSpace(inputc) && inputc != '\n' {
	1145  					inputc = s.getRune()
	1146  				}
	1147  				if inputc != eof {
	1148  					s.UnreadRune()
	1149  				}
	1150  			}
	1151  			continue
	1152  		}
	1153  
	1154  		// Verbs.
	1155  		if fmtc == '%' {
	1156  			// % at end of string is an error.
	1157  			if i+w == len(format) {
	1158  				s.errorString("missing verb: % at end of format string")
	1159  			}
	1160  			// %% acts like a real percent
	1161  			nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
	1162  			if nextc != '%' {
	1163  				return
	1164  			}
	1165  			i += w // skip the first %
	1166  		}
	1167  
	1168  		// Literals.
	1169  		inputc := s.mustReadRune()
	1170  		if fmtc != inputc {
	1171  			s.UnreadRune()
	1172  			return -1
	1173  		}
	1174  		i += w
	1175  	}
	1176  	return
	1177  }
	1178  
	1179  // doScanf does the real work when scanning with a format string.
	1180  // At the moment, it handles only pointers to basic types.
	1181  func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
	1182  	defer errorHandler(&err)
	1183  	end := len(format) - 1
	1184  	// We process one item per non-trivial format
	1185  	for i := 0; i <= end; {
	1186  		w := s.advance(format[i:])
	1187  		if w > 0 {
	1188  			i += w
	1189  			continue
	1190  		}
	1191  		// Either we failed to advance, we have a percent character, or we ran out of input.
	1192  		if format[i] != '%' {
	1193  			// Can't advance format. Why not?
	1194  			if w < 0 {
	1195  				s.errorString("input does not match format")
	1196  			}
	1197  			// Otherwise at EOF; "too many operands" error handled below
	1198  			break
	1199  		}
	1200  		i++ // % is one byte
	1201  
	1202  		// do we have 20 (width)?
	1203  		var widPresent bool
	1204  		s.maxWid, widPresent, i = parsenum(format, i, end)
	1205  		if !widPresent {
	1206  			s.maxWid = hugeWid
	1207  		}
	1208  
	1209  		c, w := utf8.DecodeRuneInString(format[i:])
	1210  		i += w
	1211  
	1212  		if c != 'c' {
	1213  			s.SkipSpace()
	1214  		}
	1215  		if c == '%' {
	1216  			s.scanPercent()
	1217  			continue // Do not consume an argument.
	1218  		}
	1219  		s.argLimit = s.limit
	1220  		if f := s.count + s.maxWid; f < s.argLimit {
	1221  			s.argLimit = f
	1222  		}
	1223  
	1224  		if numProcessed >= len(a) { // out of operands
	1225  			s.errorString("too few operands for format '%" + format[i-w:] + "'")
	1226  			break
	1227  		}
	1228  		arg := a[numProcessed]
	1229  
	1230  		s.scanOne(c, arg)
	1231  		numProcessed++
	1232  		s.argLimit = s.limit
	1233  	}
	1234  	if numProcessed < len(a) {
	1235  		s.errorString("too many operands")
	1236  	}
	1237  	return
	1238  }
	1239  

View as plain text