...

Source file src/text/scanner/scanner_test.go

Documentation: text/scanner

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package scanner
		 6  
		 7  import (
		 8  	"bytes"
		 9  	"fmt"
		10  	"io"
		11  	"strings"
		12  	"testing"
		13  	"unicode/utf8"
		14  )
		15  
		16  // A StringReader delivers its data one string segment at a time via Read.
		17  type StringReader struct {
		18  	data []string
		19  	step int
		20  }
		21  
		22  func (r *StringReader) Read(p []byte) (n int, err error) {
		23  	if r.step < len(r.data) {
		24  		s := r.data[r.step]
		25  		n = copy(p, s)
		26  		r.step++
		27  	} else {
		28  		err = io.EOF
		29  	}
		30  	return
		31  }
		32  
		33  func readRuneSegments(t *testing.T, segments []string) {
		34  	got := ""
		35  	want := strings.Join(segments, "")
		36  	s := new(Scanner).Init(&StringReader{data: segments})
		37  	for {
		38  		ch := s.Next()
		39  		if ch == EOF {
		40  			break
		41  		}
		42  		got += string(ch)
		43  	}
		44  	if got != want {
		45  		t.Errorf("segments=%v got=%s want=%s", segments, got, want)
		46  	}
		47  }
		48  
		49  var segmentList = [][]string{
		50  	{},
		51  	{""},
		52  	{"日", "本語"},
		53  	{"\u65e5", "\u672c", "\u8a9e"},
		54  	{"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
		55  	{"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
		56  	{"Hello", ", ", "World", "!"},
		57  	{"Hello", ", ", "", "World", "!"},
		58  }
		59  
		60  func TestNext(t *testing.T) {
		61  	for _, s := range segmentList {
		62  		readRuneSegments(t, s)
		63  	}
		64  }
		65  
		66  type token struct {
		67  	tok	rune
		68  	text string
		69  }
		70  
		71  var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
		72  
		73  var tokenList = []token{
		74  	{Comment, "// line comments"},
		75  	{Comment, "//"},
		76  	{Comment, "////"},
		77  	{Comment, "// comment"},
		78  	{Comment, "// /* comment */"},
		79  	{Comment, "// // comment //"},
		80  	{Comment, "//" + f100},
		81  
		82  	{Comment, "// general comments"},
		83  	{Comment, "/**/"},
		84  	{Comment, "/***/"},
		85  	{Comment, "/* comment */"},
		86  	{Comment, "/* // comment */"},
		87  	{Comment, "/* /* comment */"},
		88  	{Comment, "/*\n comment\n*/"},
		89  	{Comment, "/*" + f100 + "*/"},
		90  
		91  	{Comment, "// identifiers"},
		92  	{Ident, "a"},
		93  	{Ident, "a0"},
		94  	{Ident, "foobar"},
		95  	{Ident, "abc123"},
		96  	{Ident, "LGTM"},
		97  	{Ident, "_"},
		98  	{Ident, "_abc123"},
		99  	{Ident, "abc123_"},
	 100  	{Ident, "_abc_123_"},
	 101  	{Ident, "_äöü"},
	 102  	{Ident, "_本"},
	 103  	{Ident, "äöü"},
	 104  	{Ident, "本"},
	 105  	{Ident, "a۰۱۸"},
	 106  	{Ident, "foo६४"},
	 107  	{Ident, "bar9876"},
	 108  	{Ident, f100},
	 109  
	 110  	{Comment, "// decimal ints"},
	 111  	{Int, "0"},
	 112  	{Int, "1"},
	 113  	{Int, "9"},
	 114  	{Int, "42"},
	 115  	{Int, "1234567890"},
	 116  
	 117  	{Comment, "// octal ints"},
	 118  	{Int, "00"},
	 119  	{Int, "01"},
	 120  	{Int, "07"},
	 121  	{Int, "042"},
	 122  	{Int, "01234567"},
	 123  
	 124  	{Comment, "// hexadecimal ints"},
	 125  	{Int, "0x0"},
	 126  	{Int, "0x1"},
	 127  	{Int, "0xf"},
	 128  	{Int, "0x42"},
	 129  	{Int, "0x123456789abcDEF"},
	 130  	{Int, "0x" + f100},
	 131  	{Int, "0X0"},
	 132  	{Int, "0X1"},
	 133  	{Int, "0XF"},
	 134  	{Int, "0X42"},
	 135  	{Int, "0X123456789abcDEF"},
	 136  	{Int, "0X" + f100},
	 137  
	 138  	{Comment, "// floats"},
	 139  	{Float, "0."},
	 140  	{Float, "1."},
	 141  	{Float, "42."},
	 142  	{Float, "01234567890."},
	 143  	{Float, ".0"},
	 144  	{Float, ".1"},
	 145  	{Float, ".42"},
	 146  	{Float, ".0123456789"},
	 147  	{Float, "0.0"},
	 148  	{Float, "1.0"},
	 149  	{Float, "42.0"},
	 150  	{Float, "01234567890.0"},
	 151  	{Float, "0e0"},
	 152  	{Float, "1e0"},
	 153  	{Float, "42e0"},
	 154  	{Float, "01234567890e0"},
	 155  	{Float, "0E0"},
	 156  	{Float, "1E0"},
	 157  	{Float, "42E0"},
	 158  	{Float, "01234567890E0"},
	 159  	{Float, "0e+10"},
	 160  	{Float, "1e-10"},
	 161  	{Float, "42e+10"},
	 162  	{Float, "01234567890e-10"},
	 163  	{Float, "0E+10"},
	 164  	{Float, "1E-10"},
	 165  	{Float, "42E+10"},
	 166  	{Float, "01234567890E-10"},
	 167  
	 168  	{Comment, "// chars"},
	 169  	{Char, `' '`},
	 170  	{Char, `'a'`},
	 171  	{Char, `'本'`},
	 172  	{Char, `'\a'`},
	 173  	{Char, `'\b'`},
	 174  	{Char, `'\f'`},
	 175  	{Char, `'\n'`},
	 176  	{Char, `'\r'`},
	 177  	{Char, `'\t'`},
	 178  	{Char, `'\v'`},
	 179  	{Char, `'\''`},
	 180  	{Char, `'\000'`},
	 181  	{Char, `'\777'`},
	 182  	{Char, `'\x00'`},
	 183  	{Char, `'\xff'`},
	 184  	{Char, `'\u0000'`},
	 185  	{Char, `'\ufA16'`},
	 186  	{Char, `'\U00000000'`},
	 187  	{Char, `'\U0000ffAB'`},
	 188  
	 189  	{Comment, "// strings"},
	 190  	{String, `" "`},
	 191  	{String, `"a"`},
	 192  	{String, `"本"`},
	 193  	{String, `"\a"`},
	 194  	{String, `"\b"`},
	 195  	{String, `"\f"`},
	 196  	{String, `"\n"`},
	 197  	{String, `"\r"`},
	 198  	{String, `"\t"`},
	 199  	{String, `"\v"`},
	 200  	{String, `"\""`},
	 201  	{String, `"\000"`},
	 202  	{String, `"\777"`},
	 203  	{String, `"\x00"`},
	 204  	{String, `"\xff"`},
	 205  	{String, `"\u0000"`},
	 206  	{String, `"\ufA16"`},
	 207  	{String, `"\U00000000"`},
	 208  	{String, `"\U0000ffAB"`},
	 209  	{String, `"` + f100 + `"`},
	 210  
	 211  	{Comment, "// raw strings"},
	 212  	{RawString, "``"},
	 213  	{RawString, "`\\`"},
	 214  	{RawString, "`" + "\n\n/* foobar */\n\n" + "`"},
	 215  	{RawString, "`" + f100 + "`"},
	 216  
	 217  	{Comment, "// individual characters"},
	 218  	// NUL character is not allowed
	 219  	{'\x01', "\x01"},
	 220  	{' ' - 1, string(' ' - 1)},
	 221  	{'+', "+"},
	 222  	{'/', "/"},
	 223  	{'.', "."},
	 224  	{'~', "~"},
	 225  	{'(', "("},
	 226  }
	 227  
	 228  func makeSource(pattern string) *bytes.Buffer {
	 229  	var buf bytes.Buffer
	 230  	for _, k := range tokenList {
	 231  		fmt.Fprintf(&buf, pattern, k.text)
	 232  	}
	 233  	return &buf
	 234  }
	 235  
	 236  func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
	 237  	if got != want {
	 238  		t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
	 239  	}
	 240  	if s.Line != line {
	 241  		t.Errorf("line = %d, want %d for %q", s.Line, line, text)
	 242  	}
	 243  	stext := s.TokenText()
	 244  	if stext != text {
	 245  		t.Errorf("text = %q, want %q", stext, text)
	 246  	} else {
	 247  		// check idempotency of TokenText() call
	 248  		stext = s.TokenText()
	 249  		if stext != text {
	 250  			t.Errorf("text = %q, want %q (idempotency check)", stext, text)
	 251  		}
	 252  	}
	 253  }
	 254  
	 255  func checkTokErr(t *testing.T, s *Scanner, line int, want rune, text string) {
	 256  	prevCount := s.ErrorCount
	 257  	checkTok(t, s, line, s.Scan(), want, text)
	 258  	if s.ErrorCount != prevCount+1 {
	 259  		t.Fatalf("want error for %q", text)
	 260  	}
	 261  }
	 262  
	 263  func countNewlines(s string) int {
	 264  	n := 0
	 265  	for _, ch := range s {
	 266  		if ch == '\n' {
	 267  			n++
	 268  		}
	 269  	}
	 270  	return n
	 271  }
	 272  
	 273  func testScan(t *testing.T, mode uint) {
	 274  	s := new(Scanner).Init(makeSource(" \t%s\n"))
	 275  	s.Mode = mode
	 276  	tok := s.Scan()
	 277  	line := 1
	 278  	for _, k := range tokenList {
	 279  		if mode&SkipComments == 0 || k.tok != Comment {
	 280  			checkTok(t, s, line, tok, k.tok, k.text)
	 281  			tok = s.Scan()
	 282  		}
	 283  		line += countNewlines(k.text) + 1 // each token is on a new line
	 284  	}
	 285  	checkTok(t, s, line, tok, EOF, "")
	 286  }
	 287  
	 288  func TestScan(t *testing.T) {
	 289  	testScan(t, GoTokens)
	 290  	testScan(t, GoTokens&^SkipComments)
	 291  }
	 292  
	 293  func TestInvalidExponent(t *testing.T) {
	 294  	const src = "1.5e 1.5E 1e+ 1e- 1.5z"
	 295  	s := new(Scanner).Init(strings.NewReader(src))
	 296  	s.Error = func(s *Scanner, msg string) {
	 297  		const want = "exponent has no digits"
	 298  		if msg != want {
	 299  			t.Errorf("%s: got error %q; want %q", s.TokenText(), msg, want)
	 300  		}
	 301  	}
	 302  	checkTokErr(t, s, 1, Float, "1.5e")
	 303  	checkTokErr(t, s, 1, Float, "1.5E")
	 304  	checkTokErr(t, s, 1, Float, "1e+")
	 305  	checkTokErr(t, s, 1, Float, "1e-")
	 306  	checkTok(t, s, 1, s.Scan(), Float, "1.5")
	 307  	checkTok(t, s, 1, s.Scan(), Ident, "z")
	 308  	checkTok(t, s, 1, s.Scan(), EOF, "")
	 309  	if s.ErrorCount != 4 {
	 310  		t.Errorf("%d errors, want 4", s.ErrorCount)
	 311  	}
	 312  }
	 313  
	 314  func TestPosition(t *testing.T) {
	 315  	src := makeSource("\t\t\t\t%s\n")
	 316  	s := new(Scanner).Init(src)
	 317  	s.Mode = GoTokens &^ SkipComments
	 318  	s.Scan()
	 319  	pos := Position{"", 4, 1, 5}
	 320  	for _, k := range tokenList {
	 321  		if s.Offset != pos.Offset {
	 322  			t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
	 323  		}
	 324  		if s.Line != pos.Line {
	 325  			t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
	 326  		}
	 327  		if s.Column != pos.Column {
	 328  			t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
	 329  		}
	 330  		pos.Offset += 4 + len(k.text) + 1		 // 4 tabs + token bytes + newline
	 331  		pos.Line += countNewlines(k.text) + 1 // each token is on a new line
	 332  		s.Scan()
	 333  	}
	 334  	// make sure there were no token-internal errors reported by scanner
	 335  	if s.ErrorCount != 0 {
	 336  		t.Errorf("%d errors", s.ErrorCount)
	 337  	}
	 338  }
	 339  
	 340  func TestScanZeroMode(t *testing.T) {
	 341  	src := makeSource("%s\n")
	 342  	str := src.String()
	 343  	s := new(Scanner).Init(src)
	 344  	s.Mode = 0			 // don't recognize any token classes
	 345  	s.Whitespace = 0 // don't skip any whitespace
	 346  	tok := s.Scan()
	 347  	for i, ch := range str {
	 348  		if tok != ch {
	 349  			t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
	 350  		}
	 351  		tok = s.Scan()
	 352  	}
	 353  	if tok != EOF {
	 354  		t.Fatalf("tok = %s, want EOF", TokenString(tok))
	 355  	}
	 356  	if s.ErrorCount != 0 {
	 357  		t.Errorf("%d errors", s.ErrorCount)
	 358  	}
	 359  }
	 360  
	 361  func testScanSelectedMode(t *testing.T, mode uint, class rune) {
	 362  	src := makeSource("%s\n")
	 363  	s := new(Scanner).Init(src)
	 364  	s.Mode = mode
	 365  	tok := s.Scan()
	 366  	for tok != EOF {
	 367  		if tok < 0 && tok != class {
	 368  			t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
	 369  		}
	 370  		tok = s.Scan()
	 371  	}
	 372  	if s.ErrorCount != 0 {
	 373  		t.Errorf("%d errors", s.ErrorCount)
	 374  	}
	 375  }
	 376  
	 377  func TestScanSelectedMask(t *testing.T) {
	 378  	testScanSelectedMode(t, 0, 0)
	 379  	testScanSelectedMode(t, ScanIdents, Ident)
	 380  	// Don't test ScanInts and ScanNumbers since some parts of
	 381  	// the floats in the source look like (invalid) octal ints
	 382  	// and ScanNumbers may return either Int or Float.
	 383  	testScanSelectedMode(t, ScanChars, Char)
	 384  	testScanSelectedMode(t, ScanStrings, String)
	 385  	testScanSelectedMode(t, SkipComments, 0)
	 386  	testScanSelectedMode(t, ScanComments, Comment)
	 387  }
	 388  
	 389  func TestScanCustomIdent(t *testing.T) {
	 390  	const src = "faab12345 a12b123 a12 3b"
	 391  	s := new(Scanner).Init(strings.NewReader(src))
	 392  	// ident = ( 'a' | 'b' ) { digit } .
	 393  	// digit = '0' .. '3' .
	 394  	// with a maximum length of 4
	 395  	s.IsIdentRune = func(ch rune, i int) bool {
	 396  		return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
	 397  	}
	 398  	checkTok(t, s, 1, s.Scan(), 'f', "f")
	 399  	checkTok(t, s, 1, s.Scan(), Ident, "a")
	 400  	checkTok(t, s, 1, s.Scan(), Ident, "a")
	 401  	checkTok(t, s, 1, s.Scan(), Ident, "b123")
	 402  	checkTok(t, s, 1, s.Scan(), Int, "45")
	 403  	checkTok(t, s, 1, s.Scan(), Ident, "a12")
	 404  	checkTok(t, s, 1, s.Scan(), Ident, "b123")
	 405  	checkTok(t, s, 1, s.Scan(), Ident, "a12")
	 406  	checkTok(t, s, 1, s.Scan(), Int, "3")
	 407  	checkTok(t, s, 1, s.Scan(), Ident, "b")
	 408  	checkTok(t, s, 1, s.Scan(), EOF, "")
	 409  }
	 410  
	 411  func TestScanNext(t *testing.T) {
	 412  	const BOM = '\uFEFF'
	 413  	BOMs := string(BOM)
	 414  	s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof"))
	 415  	checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored
	 416  	checkTok(t, s, 1, s.Scan(), Ident, "a")
	 417  	checkTok(t, s, 1, s.Scan(), '=', "=")
	 418  	checkTok(t, s, 0, s.Next(), '=', "")
	 419  	checkTok(t, s, 0, s.Next(), ' ', "")
	 420  	checkTok(t, s, 0, s.Next(), 'b', "")
	 421  	checkTok(t, s, 1, s.Scan(), Ident, "cd")
	 422  	checkTok(t, s, 1, s.Scan(), '{', "{")
	 423  	checkTok(t, s, 2, s.Scan(), Ident, "a")
	 424  	checkTok(t, s, 2, s.Scan(), '+', "+")
	 425  	checkTok(t, s, 0, s.Next(), '=', "")
	 426  	checkTok(t, s, 2, s.Scan(), Ident, "c")
	 427  	checkTok(t, s, 3, s.Scan(), '}', "}")
	 428  	checkTok(t, s, 3, s.Scan(), BOM, BOMs)
	 429  	checkTok(t, s, 3, s.Scan(), -1, "")
	 430  	if s.ErrorCount != 0 {
	 431  		t.Errorf("%d errors", s.ErrorCount)
	 432  	}
	 433  }
	 434  
	 435  func TestScanWhitespace(t *testing.T) {
	 436  	var buf bytes.Buffer
	 437  	var ws uint64
	 438  	// start at 1, NUL character is not allowed
	 439  	for ch := byte(1); ch < ' '; ch++ {
	 440  		buf.WriteByte(ch)
	 441  		ws |= 1 << ch
	 442  	}
	 443  	const orig = 'x'
	 444  	buf.WriteByte(orig)
	 445  
	 446  	s := new(Scanner).Init(&buf)
	 447  	s.Mode = 0
	 448  	s.Whitespace = ws
	 449  	tok := s.Scan()
	 450  	if tok != orig {
	 451  		t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
	 452  	}
	 453  }
	 454  
	 455  func testError(t *testing.T, src, pos, msg string, tok rune) {
	 456  	s := new(Scanner).Init(strings.NewReader(src))
	 457  	errorCalled := false
	 458  	s.Error = func(s *Scanner, m string) {
	 459  		if !errorCalled {
	 460  			// only look at first error
	 461  			if p := s.Pos().String(); p != pos {
	 462  				t.Errorf("pos = %q, want %q for %q", p, pos, src)
	 463  			}
	 464  			if m != msg {
	 465  				t.Errorf("msg = %q, want %q for %q", m, msg, src)
	 466  			}
	 467  			errorCalled = true
	 468  		}
	 469  	}
	 470  	tk := s.Scan()
	 471  	if tk != tok {
	 472  		t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
	 473  	}
	 474  	if !errorCalled {
	 475  		t.Errorf("error handler not called for %q", src)
	 476  	}
	 477  	if s.ErrorCount == 0 {
	 478  		t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
	 479  	}
	 480  }
	 481  
	 482  func TestError(t *testing.T) {
	 483  	testError(t, "\x00", "<input>:1:1", "invalid character NUL", 0)
	 484  	testError(t, "\x80", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
	 485  	testError(t, "\xff", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
	 486  
	 487  	testError(t, "a\x00", "<input>:1:2", "invalid character NUL", Ident)
	 488  	testError(t, "ab\x80", "<input>:1:3", "invalid UTF-8 encoding", Ident)
	 489  	testError(t, "abc\xff", "<input>:1:4", "invalid UTF-8 encoding", Ident)
	 490  
	 491  	testError(t, `"a`+"\x00", "<input>:1:3", "invalid character NUL", String)
	 492  	testError(t, `"ab`+"\x80", "<input>:1:4", "invalid UTF-8 encoding", String)
	 493  	testError(t, `"abc`+"\xff", "<input>:1:5", "invalid UTF-8 encoding", String)
	 494  
	 495  	testError(t, "`a"+"\x00", "<input>:1:3", "invalid character NUL", RawString)
	 496  	testError(t, "`ab"+"\x80", "<input>:1:4", "invalid UTF-8 encoding", RawString)
	 497  	testError(t, "`abc"+"\xff", "<input>:1:5", "invalid UTF-8 encoding", RawString)
	 498  
	 499  	testError(t, `'\"'`, "<input>:1:3", "invalid char escape", Char)
	 500  	testError(t, `"\'"`, "<input>:1:3", "invalid char escape", String)
	 501  
	 502  	testError(t, `01238`, "<input>:1:6", "invalid digit '8' in octal literal", Int)
	 503  	testError(t, `01238123`, "<input>:1:9", "invalid digit '8' in octal literal", Int)
	 504  	testError(t, `0x`, "<input>:1:3", "hexadecimal literal has no digits", Int)
	 505  	testError(t, `0xg`, "<input>:1:3", "hexadecimal literal has no digits", Int)
	 506  	testError(t, `'aa'`, "<input>:1:4", "invalid char literal", Char)
	 507  	testError(t, `1.5e`, "<input>:1:5", "exponent has no digits", Float)
	 508  	testError(t, `1.5E`, "<input>:1:5", "exponent has no digits", Float)
	 509  	testError(t, `1.5e+`, "<input>:1:6", "exponent has no digits", Float)
	 510  	testError(t, `1.5e-`, "<input>:1:6", "exponent has no digits", Float)
	 511  
	 512  	testError(t, `'`, "<input>:1:2", "literal not terminated", Char)
	 513  	testError(t, `'`+"\n", "<input>:1:2", "literal not terminated", Char)
	 514  	testError(t, `"abc`, "<input>:1:5", "literal not terminated", String)
	 515  	testError(t, `"abc`+"\n", "<input>:1:5", "literal not terminated", String)
	 516  	testError(t, "`abc\n", "<input>:2:1", "literal not terminated", RawString)
	 517  	testError(t, `/*/`, "<input>:1:4", "comment not terminated", EOF)
	 518  }
	 519  
	 520  // An errReader returns (0, err) where err is not io.EOF.
	 521  type errReader struct{}
	 522  
	 523  func (errReader) Read(b []byte) (int, error) {
	 524  	return 0, io.ErrNoProgress // some error that is not io.EOF
	 525  }
	 526  
	 527  func TestIOError(t *testing.T) {
	 528  	s := new(Scanner).Init(errReader{})
	 529  	errorCalled := false
	 530  	s.Error = func(s *Scanner, msg string) {
	 531  		if !errorCalled {
	 532  			if want := io.ErrNoProgress.Error(); msg != want {
	 533  				t.Errorf("msg = %q, want %q", msg, want)
	 534  			}
	 535  			errorCalled = true
	 536  		}
	 537  	}
	 538  	tok := s.Scan()
	 539  	if tok != EOF {
	 540  		t.Errorf("tok = %s, want EOF", TokenString(tok))
	 541  	}
	 542  	if !errorCalled {
	 543  		t.Errorf("error handler not called")
	 544  	}
	 545  }
	 546  
	 547  func checkPos(t *testing.T, got, want Position) {
	 548  	if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column {
	 549  		t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
	 550  			got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column)
	 551  	}
	 552  }
	 553  
	 554  func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
	 555  	if ch := s.Next(); ch != char {
	 556  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
	 557  	}
	 558  	want := Position{Offset: offset, Line: line, Column: column}
	 559  	checkPos(t, s.Pos(), want)
	 560  }
	 561  
	 562  func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
	 563  	want := Position{Offset: offset, Line: line, Column: column}
	 564  	checkPos(t, s.Pos(), want)
	 565  	if ch := s.Scan(); ch != char {
	 566  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
	 567  		if string(ch) != s.TokenText() {
	 568  			t.Errorf("tok = %q, want %q", s.TokenText(), string(ch))
	 569  		}
	 570  	}
	 571  	checkPos(t, s.Position, want)
	 572  }
	 573  
	 574  func TestPos(t *testing.T) {
	 575  	// corner case: empty source
	 576  	s := new(Scanner).Init(strings.NewReader(""))
	 577  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
	 578  	s.Peek() // peek doesn't affect the position
	 579  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
	 580  
	 581  	// corner case: source with only a newline
	 582  	s = new(Scanner).Init(strings.NewReader("\n"))
	 583  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
	 584  	checkNextPos(t, s, 1, 2, 1, '\n')
	 585  	// after EOF position doesn't change
	 586  	for i := 10; i > 0; i-- {
	 587  		checkScanPos(t, s, 1, 2, 1, EOF)
	 588  	}
	 589  	if s.ErrorCount != 0 {
	 590  		t.Errorf("%d errors", s.ErrorCount)
	 591  	}
	 592  
	 593  	// corner case: source with only a single character
	 594  	s = new(Scanner).Init(strings.NewReader("本"))
	 595  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
	 596  	checkNextPos(t, s, 3, 1, 2, '本')
	 597  	// after EOF position doesn't change
	 598  	for i := 10; i > 0; i-- {
	 599  		checkScanPos(t, s, 3, 1, 2, EOF)
	 600  	}
	 601  	if s.ErrorCount != 0 {
	 602  		t.Errorf("%d errors", s.ErrorCount)
	 603  	}
	 604  
	 605  	// positions after calling Next
	 606  	s = new(Scanner).Init(strings.NewReader("	foo६४	\n\n本語\n"))
	 607  	checkNextPos(t, s, 1, 1, 2, ' ')
	 608  	s.Peek() // peek doesn't affect the position
	 609  	checkNextPos(t, s, 2, 1, 3, ' ')
	 610  	checkNextPos(t, s, 3, 1, 4, 'f')
	 611  	checkNextPos(t, s, 4, 1, 5, 'o')
	 612  	checkNextPos(t, s, 5, 1, 6, 'o')
	 613  	checkNextPos(t, s, 8, 1, 7, '६')
	 614  	checkNextPos(t, s, 11, 1, 8, '४')
	 615  	checkNextPos(t, s, 12, 1, 9, ' ')
	 616  	checkNextPos(t, s, 13, 1, 10, ' ')
	 617  	checkNextPos(t, s, 14, 2, 1, '\n')
	 618  	checkNextPos(t, s, 15, 3, 1, '\n')
	 619  	checkNextPos(t, s, 18, 3, 2, '本')
	 620  	checkNextPos(t, s, 21, 3, 3, '語')
	 621  	checkNextPos(t, s, 22, 4, 1, '\n')
	 622  	// after EOF position doesn't change
	 623  	for i := 10; i > 0; i-- {
	 624  		checkScanPos(t, s, 22, 4, 1, EOF)
	 625  	}
	 626  	if s.ErrorCount != 0 {
	 627  		t.Errorf("%d errors", s.ErrorCount)
	 628  	}
	 629  
	 630  	// positions after calling Scan
	 631  	s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx"))
	 632  	s.Mode = 0
	 633  	s.Whitespace = 0
	 634  	checkScanPos(t, s, 0, 1, 1, 'a')
	 635  	s.Peek() // peek doesn't affect the position
	 636  	checkScanPos(t, s, 1, 1, 2, 'b')
	 637  	checkScanPos(t, s, 2, 1, 3, 'c')
	 638  	checkScanPos(t, s, 3, 1, 4, '\n')
	 639  	checkScanPos(t, s, 4, 2, 1, '本')
	 640  	checkScanPos(t, s, 7, 2, 2, '語')
	 641  	checkScanPos(t, s, 10, 2, 3, '\n')
	 642  	checkScanPos(t, s, 11, 3, 1, '\n')
	 643  	checkScanPos(t, s, 12, 4, 1, 'x')
	 644  	// after EOF position doesn't change
	 645  	for i := 10; i > 0; i-- {
	 646  		checkScanPos(t, s, 13, 4, 2, EOF)
	 647  	}
	 648  	if s.ErrorCount != 0 {
	 649  		t.Errorf("%d errors", s.ErrorCount)
	 650  	}
	 651  }
	 652  
	 653  type countReader int
	 654  
	 655  func (r *countReader) Read([]byte) (int, error) {
	 656  	*r++
	 657  	return 0, io.EOF
	 658  }
	 659  
	 660  func TestNextEOFHandling(t *testing.T) {
	 661  	var r countReader
	 662  
	 663  	// corner case: empty source
	 664  	s := new(Scanner).Init(&r)
	 665  
	 666  	tok := s.Next()
	 667  	if tok != EOF {
	 668  		t.Error("1) EOF not reported")
	 669  	}
	 670  
	 671  	tok = s.Peek()
	 672  	if tok != EOF {
	 673  		t.Error("2) EOF not reported")
	 674  	}
	 675  
	 676  	if r != 1 {
	 677  		t.Errorf("scanner called Read %d times, not once", r)
	 678  	}
	 679  }
	 680  
	 681  func TestScanEOFHandling(t *testing.T) {
	 682  	var r countReader
	 683  
	 684  	// corner case: empty source
	 685  	s := new(Scanner).Init(&r)
	 686  
	 687  	tok := s.Scan()
	 688  	if tok != EOF {
	 689  		t.Error("1) EOF not reported")
	 690  	}
	 691  
	 692  	tok = s.Peek()
	 693  	if tok != EOF {
	 694  		t.Error("2) EOF not reported")
	 695  	}
	 696  
	 697  	if r != 1 {
	 698  		t.Errorf("scanner called Read %d times, not once", r)
	 699  	}
	 700  }
	 701  
	 702  func TestIssue29723(t *testing.T) {
	 703  	s := new(Scanner).Init(strings.NewReader(`x "`))
	 704  	s.Error = func(s *Scanner, _ string) {
	 705  		got := s.TokenText() // this call shouldn't panic
	 706  		const want = `"`
	 707  		if got != want {
	 708  			t.Errorf("got %q; want %q", got, want)
	 709  		}
	 710  	}
	 711  	for r := s.Scan(); r != EOF; r = s.Scan() {
	 712  	}
	 713  }
	 714  
	 715  func TestNumbers(t *testing.T) {
	 716  	for _, test := range []struct {
	 717  		tok							rune
	 718  		src, tokens, err string
	 719  	}{
	 720  		// binaries
	 721  		{Int, "0b0", "0b0", ""},
	 722  		{Int, "0b1010", "0b1010", ""},
	 723  		{Int, "0B1110", "0B1110", ""},
	 724  
	 725  		{Int, "0b", "0b", "binary literal has no digits"},
	 726  		{Int, "0b0190", "0b0190", "invalid digit '9' in binary literal"},
	 727  		{Int, "0b01a0", "0b01 a0", ""}, // only accept 0-9
	 728  
	 729  		// binary floats (invalid)
	 730  		{Float, "0b.", "0b.", "invalid radix point in binary literal"},
	 731  		{Float, "0b.1", "0b.1", "invalid radix point in binary literal"},
	 732  		{Float, "0b1.0", "0b1.0", "invalid radix point in binary literal"},
	 733  		{Float, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"},
	 734  		{Float, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"},
	 735  
	 736  		// octals
	 737  		{Int, "0o0", "0o0", ""},
	 738  		{Int, "0o1234", "0o1234", ""},
	 739  		{Int, "0O1234", "0O1234", ""},
	 740  
	 741  		{Int, "0o", "0o", "octal literal has no digits"},
	 742  		{Int, "0o8123", "0o8123", "invalid digit '8' in octal literal"},
	 743  		{Int, "0o1293", "0o1293", "invalid digit '9' in octal literal"},
	 744  		{Int, "0o12a3", "0o12 a3", ""}, // only accept 0-9
	 745  
	 746  		// octal floats (invalid)
	 747  		{Float, "0o.", "0o.", "invalid radix point in octal literal"},
	 748  		{Float, "0o.2", "0o.2", "invalid radix point in octal literal"},
	 749  		{Float, "0o1.2", "0o1.2", "invalid radix point in octal literal"},
	 750  		{Float, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"},
	 751  		{Float, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"},
	 752  
	 753  		// 0-octals
	 754  		{Int, "0", "0", ""},
	 755  		{Int, "0123", "0123", ""},
	 756  
	 757  		{Int, "08123", "08123", "invalid digit '8' in octal literal"},
	 758  		{Int, "01293", "01293", "invalid digit '9' in octal literal"},
	 759  		{Int, "0F.", "0 F .", ""}, // only accept 0-9
	 760  		{Int, "0123F.", "0123 F .", ""},
	 761  		{Int, "0123456x", "0123456 x", ""},
	 762  
	 763  		// decimals
	 764  		{Int, "1", "1", ""},
	 765  		{Int, "1234", "1234", ""},
	 766  
	 767  		{Int, "1f", "1 f", ""}, // only accept 0-9
	 768  
	 769  		// decimal floats
	 770  		{Float, "0.", "0.", ""},
	 771  		{Float, "123.", "123.", ""},
	 772  		{Float, "0123.", "0123.", ""},
	 773  
	 774  		{Float, ".0", ".0", ""},
	 775  		{Float, ".123", ".123", ""},
	 776  		{Float, ".0123", ".0123", ""},
	 777  
	 778  		{Float, "0.0", "0.0", ""},
	 779  		{Float, "123.123", "123.123", ""},
	 780  		{Float, "0123.0123", "0123.0123", ""},
	 781  
	 782  		{Float, "0e0", "0e0", ""},
	 783  		{Float, "123e+0", "123e+0", ""},
	 784  		{Float, "0123E-1", "0123E-1", ""},
	 785  
	 786  		{Float, "0.e+1", "0.e+1", ""},
	 787  		{Float, "123.E-10", "123.E-10", ""},
	 788  		{Float, "0123.e123", "0123.e123", ""},
	 789  
	 790  		{Float, ".0e-1", ".0e-1", ""},
	 791  		{Float, ".123E+10", ".123E+10", ""},
	 792  		{Float, ".0123E123", ".0123E123", ""},
	 793  
	 794  		{Float, "0.0e1", "0.0e1", ""},
	 795  		{Float, "123.123E-10", "123.123E-10", ""},
	 796  		{Float, "0123.0123e+456", "0123.0123e+456", ""},
	 797  
	 798  		{Float, "0e", "0e", "exponent has no digits"},
	 799  		{Float, "0E+", "0E+", "exponent has no digits"},
	 800  		{Float, "1e+f", "1e+ f", "exponent has no digits"},
	 801  		{Float, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"},
	 802  		{Float, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"},
	 803  
	 804  		// hexadecimals
	 805  		{Int, "0x0", "0x0", ""},
	 806  		{Int, "0x1234", "0x1234", ""},
	 807  		{Int, "0xcafef00d", "0xcafef00d", ""},
	 808  		{Int, "0XCAFEF00D", "0XCAFEF00D", ""},
	 809  
	 810  		{Int, "0x", "0x", "hexadecimal literal has no digits"},
	 811  		{Int, "0x1g", "0x1 g", ""},
	 812  
	 813  		// hexadecimal floats
	 814  		{Float, "0x0p0", "0x0p0", ""},
	 815  		{Float, "0x12efp-123", "0x12efp-123", ""},
	 816  		{Float, "0xABCD.p+0", "0xABCD.p+0", ""},
	 817  		{Float, "0x.0189P-0", "0x.0189P-0", ""},
	 818  		{Float, "0x1.ffffp+1023", "0x1.ffffp+1023", ""},
	 819  
	 820  		{Float, "0x.", "0x.", "hexadecimal literal has no digits"},
	 821  		{Float, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"},
	 822  		{Float, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"},
	 823  		{Float, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"},
	 824  		{Float, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"},
	 825  		{Float, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"},
	 826  		{Float, "0x0p", "0x0p", "exponent has no digits"},
	 827  		{Float, "0xeP-", "0xeP-", "exponent has no digits"},
	 828  		{Float, "0x1234PAB", "0x1234P AB", "exponent has no digits"},
	 829  		{Float, "0x1.2p1a", "0x1.2p1 a", ""},
	 830  
	 831  		// separators
	 832  		{Int, "0b_1000_0001", "0b_1000_0001", ""},
	 833  		{Int, "0o_600", "0o_600", ""},
	 834  		{Int, "0_466", "0_466", ""},
	 835  		{Int, "1_000", "1_000", ""},
	 836  		{Float, "1_000.000_1", "1_000.000_1", ""},
	 837  		{Int, "0x_f00d", "0x_f00d", ""},
	 838  		{Float, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""},
	 839  
	 840  		{Int, "0b__1000", "0b__1000", "'_' must separate successive digits"},
	 841  		{Int, "0o60___0", "0o60___0", "'_' must separate successive digits"},
	 842  		{Int, "0466_", "0466_", "'_' must separate successive digits"},
	 843  		{Float, "1_.", "1_.", "'_' must separate successive digits"},
	 844  		{Float, "0._1", "0._1", "'_' must separate successive digits"},
	 845  		{Float, "2.7_e0", "2.7_e0", "'_' must separate successive digits"},
	 846  		{Int, "0x___0", "0x___0", "'_' must separate successive digits"},
	 847  		{Float, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"},
	 848  	} {
	 849  		s := new(Scanner).Init(strings.NewReader(test.src))
	 850  		var err string
	 851  		s.Error = func(s *Scanner, msg string) {
	 852  			if err == "" {
	 853  				err = msg
	 854  			}
	 855  		}
	 856  
	 857  		for i, want := range strings.Split(test.tokens, " ") {
	 858  			err = ""
	 859  			tok := s.Scan()
	 860  			lit := s.TokenText()
	 861  			if i == 0 {
	 862  				if tok != test.tok {
	 863  					t.Errorf("%q: got token %s; want %s", test.src, TokenString(tok), TokenString(test.tok))
	 864  				}
	 865  				if err != test.err {
	 866  					t.Errorf("%q: got error %q; want %q", test.src, err, test.err)
	 867  				}
	 868  			}
	 869  			if lit != want {
	 870  				t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, TokenString(tok), want)
	 871  			}
	 872  		}
	 873  
	 874  		// make sure we read all
	 875  		if tok := s.Scan(); tok != EOF {
	 876  			t.Errorf("%q: got %s; want EOF", test.src, TokenString(tok))
	 877  		}
	 878  	}
	 879  }
	 880  
	 881  func TestIssue30320(t *testing.T) {
	 882  	for _, test := range []struct {
	 883  		in, want string
	 884  		mode		 uint
	 885  	}{
	 886  		{"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
	 887  		{"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
	 888  		{"xxx1e0yyy", "1 0", ScanInts},
	 889  		{"1_2", "1_2", ScanInts},
	 890  		{"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
	 891  		{"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
	 892  	} {
	 893  		got := extractInts(test.in, test.mode)
	 894  		if got != test.want {
	 895  			t.Errorf("%q: got %q; want %q", test.in, got, test.want)
	 896  		}
	 897  	}
	 898  }
	 899  
	 900  func extractInts(t string, mode uint) (res string) {
	 901  	var s Scanner
	 902  	s.Init(strings.NewReader(t))
	 903  	s.Mode = mode
	 904  	for {
	 905  		switch tok := s.Scan(); tok {
	 906  		case Int, Float:
	 907  			if len(res) > 0 {
	 908  				res += " "
	 909  			}
	 910  			res += s.TokenText()
	 911  		case EOF:
	 912  			return
	 913  		}
	 914  	}
	 915  }
	 916  

View as plain text