scan_test.go

Documentation: bufio

		 1  // Copyright 2013 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package bufio_test
		 6  
		 7  import (
		 8  	. "bufio"
		 9  	"bytes"
		10  	"errors"
		11  	"io"
		12  	"strings"
		13  	"testing"
		14  	"unicode"
		15  	"unicode/utf8"
		16  )
		17  
		18  const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
		19  
		20  // Test white space table matches the Unicode definition.
		21  func TestSpace(t *testing.T) {
		22  	for r := rune(0); r <= utf8.MaxRune; r++ {
		23  		if IsSpace(r) != unicode.IsSpace(r) {
		24  			t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r))
		25  		}
		26  	}
		27  }
		28  
		29  var scanTests = []string{
		30  	"",
		31  	"a",
		32  	"¼",
		33  	"☹",
		34  	"\x81",	 // UTF-8 error
		35  	"\uFFFD", // correctly encoded RuneError
		36  	"abcdefgh",
		37  	"abc def\n\t\tgh		",
		38  	"abc¼☹\x81\uFFFD日本語\x82abc",
		39  }
		40  
		41  func TestScanByte(t *testing.T) {
		42  	for n, test := range scanTests {
		43  		buf := strings.NewReader(test)
		44  		s := NewScanner(buf)
		45  		s.Split(ScanBytes)
		46  		var i int
		47  		for i = 0; s.Scan(); i++ {
		48  			if b := s.Bytes(); len(b) != 1 || b[0] != test[i] {
		49  				t.Errorf("#%d: %d: expected %q got %q", n, i, test, b)
		50  			}
		51  		}
		52  		if i != len(test) {
		53  			t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i)
		54  		}
		55  		err := s.Err()
		56  		if err != nil {
		57  			t.Errorf("#%d: %v", n, err)
		58  		}
		59  	}
		60  }
		61  
		62  // Test that the rune splitter returns same sequence of runes (not bytes) as for range string.
		63  func TestScanRune(t *testing.T) {
		64  	for n, test := range scanTests {
		65  		buf := strings.NewReader(test)
		66  		s := NewScanner(buf)
		67  		s.Split(ScanRunes)
		68  		var i, runeCount int
		69  		var expect rune
		70  		// Use a string range loop to validate the sequence of runes.
		71  		for i, expect = range string(test) {
		72  			if !s.Scan() {
		73  				break
		74  			}
		75  			runeCount++
		76  			got, _ := utf8.DecodeRune(s.Bytes())
		77  			if got != expect {
		78  				t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got)
		79  			}
		80  		}
		81  		if s.Scan() {
		82  			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
		83  		}
		84  		testRuneCount := utf8.RuneCountInString(test)
		85  		if runeCount != testRuneCount {
		86  			t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount)
		87  		}
		88  		err := s.Err()
		89  		if err != nil {
		90  			t.Errorf("#%d: %v", n, err)
		91  		}
		92  	}
		93  }
		94  
		95  var wordScanTests = []string{
		96  	"",
		97  	" ",
		98  	"\n",
		99  	"a",
	 100  	" a ",
	 101  	"abc def",
	 102  	" abc def ",
	 103  	" abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n",
	 104  }
	 105  
	 106  // Test that the word splitter returns the same data as strings.Fields.
	 107  func TestScanWords(t *testing.T) {
	 108  	for n, test := range wordScanTests {
	 109  		buf := strings.NewReader(test)
	 110  		s := NewScanner(buf)
	 111  		s.Split(ScanWords)
	 112  		words := strings.Fields(test)
	 113  		var wordCount int
	 114  		for wordCount = 0; wordCount < len(words); wordCount++ {
	 115  			if !s.Scan() {
	 116  				break
	 117  			}
	 118  			got := s.Text()
	 119  			if got != words[wordCount] {
	 120  				t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got)
	 121  			}
	 122  		}
	 123  		if s.Scan() {
	 124  			t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
	 125  		}
	 126  		if wordCount != len(words) {
	 127  			t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount)
	 128  		}
	 129  		err := s.Err()
	 130  		if err != nil {
	 131  			t.Errorf("#%d: %v", n, err)
	 132  		}
	 133  	}
	 134  }
	 135  
	 136  // slowReader is a reader that returns only a few bytes at a time, to test the incremental
	 137  // reads in Scanner.Scan.
	 138  type slowReader struct {
	 139  	max int
	 140  	buf io.Reader
	 141  }
	 142  
	 143  func (sr *slowReader) Read(p []byte) (n int, err error) {
	 144  	if len(p) > sr.max {
	 145  		p = p[0:sr.max]
	 146  	}
	 147  	return sr.buf.Read(p)
	 148  }
	 149  
	 150  // genLine writes to buf a predictable but non-trivial line of text of length
	 151  // n, including the terminal newline and an occasional carriage return.
	 152  // If addNewline is false, the \r and \n are not emitted.
	 153  func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
	 154  	buf.Reset()
	 155  	doCR := lineNum%5 == 0
	 156  	if doCR {
	 157  		n--
	 158  	}
	 159  	for i := 0; i < n-1; i++ { // Stop early for \n.
	 160  		c := 'a' + byte(lineNum+i)
	 161  		if c == '\n' || c == '\r' { // Don't confuse us.
	 162  			c = 'N'
	 163  		}
	 164  		buf.WriteByte(c)
	 165  	}
	 166  	if addNewline {
	 167  		if doCR {
	 168  			buf.WriteByte('\r')
	 169  		}
	 170  		buf.WriteByte('\n')
	 171  	}
	 172  }
	 173  
	 174  // Test the line splitter, including some carriage returns but no long lines.
	 175  func TestScanLongLines(t *testing.T) {
	 176  	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
	 177  	tmp := new(bytes.Buffer)
	 178  	buf := new(bytes.Buffer)
	 179  	lineNum := 0
	 180  	j := 0
	 181  	for i := 0; i < 2*smallMaxTokenSize; i++ {
	 182  		genLine(tmp, lineNum, j, true)
	 183  		if j < smallMaxTokenSize {
	 184  			j++
	 185  		} else {
	 186  			j--
	 187  		}
	 188  		buf.Write(tmp.Bytes())
	 189  		lineNum++
	 190  	}
	 191  	s := NewScanner(&slowReader{1, buf})
	 192  	s.Split(ScanLines)
	 193  	s.MaxTokenSize(smallMaxTokenSize)
	 194  	j = 0
	 195  	for lineNum := 0; s.Scan(); lineNum++ {
	 196  		genLine(tmp, lineNum, j, false)
	 197  		if j < smallMaxTokenSize {
	 198  			j++
	 199  		} else {
	 200  			j--
	 201  		}
	 202  		line := tmp.String() // We use the string-valued token here, for variety.
	 203  		if s.Text() != line {
	 204  			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line)
	 205  		}
	 206  	}
	 207  	err := s.Err()
	 208  	if err != nil {
	 209  		t.Fatal(err)
	 210  	}
	 211  }
	 212  
	 213  // Test that the line splitter errors out on a long line.
	 214  func TestScanLineTooLong(t *testing.T) {
	 215  	const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
	 216  	// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
	 217  	tmp := new(bytes.Buffer)
	 218  	buf := new(bytes.Buffer)
	 219  	lineNum := 0
	 220  	j := 0
	 221  	for i := 0; i < 2*smallMaxTokenSize; i++ {
	 222  		genLine(tmp, lineNum, j, true)
	 223  		j++
	 224  		buf.Write(tmp.Bytes())
	 225  		lineNum++
	 226  	}
	 227  	s := NewScanner(&slowReader{3, buf})
	 228  	s.Split(ScanLines)
	 229  	s.MaxTokenSize(smallMaxTokenSize)
	 230  	j = 0
	 231  	for lineNum := 0; s.Scan(); lineNum++ {
	 232  		genLine(tmp, lineNum, j, false)
	 233  		if j < smallMaxTokenSize {
	 234  			j++
	 235  		} else {
	 236  			j--
	 237  		}
	 238  		line := tmp.Bytes()
	 239  		if !bytes.Equal(s.Bytes(), line) {
	 240  			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
	 241  		}
	 242  	}
	 243  	err := s.Err()
	 244  	if err != ErrTooLong {
	 245  		t.Fatalf("expected ErrTooLong; got %s", err)
	 246  	}
	 247  }
	 248  
	 249  // Test that the line splitter handles a final line without a newline.
	 250  func testNoNewline(text string, lines []string, t *testing.T) {
	 251  	buf := strings.NewReader(text)
	 252  	s := NewScanner(&slowReader{7, buf})
	 253  	s.Split(ScanLines)
	 254  	for lineNum := 0; s.Scan(); lineNum++ {
	 255  		line := lines[lineNum]
	 256  		if s.Text() != line {
	 257  			t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
	 258  		}
	 259  	}
	 260  	err := s.Err()
	 261  	if err != nil {
	 262  		t.Fatal(err)
	 263  	}
	 264  }
	 265  
	 266  // Test that the line splitter handles a final line without a newline.
	 267  func TestScanLineNoNewline(t *testing.T) {
	 268  	const text = "abcdefghijklmn\nopqrstuvwxyz"
	 269  	lines := []string{
	 270  		"abcdefghijklmn",
	 271  		"opqrstuvwxyz",
	 272  	}
	 273  	testNoNewline(text, lines, t)
	 274  }
	 275  
	 276  // Test that the line splitter handles a final line with a carriage return but no newline.
	 277  func TestScanLineReturnButNoNewline(t *testing.T) {
	 278  	const text = "abcdefghijklmn\nopqrstuvwxyz\r"
	 279  	lines := []string{
	 280  		"abcdefghijklmn",
	 281  		"opqrstuvwxyz",
	 282  	}
	 283  	testNoNewline(text, lines, t)
	 284  }
	 285  
	 286  // Test that the line splitter handles a final empty line.
	 287  func TestScanLineEmptyFinalLine(t *testing.T) {
	 288  	const text = "abcdefghijklmn\nopqrstuvwxyz\n\n"
	 289  	lines := []string{
	 290  		"abcdefghijklmn",
	 291  		"opqrstuvwxyz",
	 292  		"",
	 293  	}
	 294  	testNoNewline(text, lines, t)
	 295  }
	 296  
	 297  // Test that the line splitter handles a final empty line with a carriage return but no newline.
	 298  func TestScanLineEmptyFinalLineWithCR(t *testing.T) {
	 299  	const text = "abcdefghijklmn\nopqrstuvwxyz\n\r"
	 300  	lines := []string{
	 301  		"abcdefghijklmn",
	 302  		"opqrstuvwxyz",
	 303  		"",
	 304  	}
	 305  	testNoNewline(text, lines, t)
	 306  }
	 307  
	 308  var testError = errors.New("testError")
	 309  
	 310  // Test the correct error is returned when the split function errors out.
	 311  func TestSplitError(t *testing.T) {
	 312  	// Create a split function that delivers a little data, then a predictable error.
	 313  	numSplits := 0
	 314  	const okCount = 7
	 315  	errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
	 316  		if atEOF {
	 317  			panic("didn't get enough data")
	 318  		}
	 319  		if numSplits >= okCount {
	 320  			return 0, nil, testError
	 321  		}
	 322  		numSplits++
	 323  		return 1, data[0:1], nil
	 324  	}
	 325  	// Read the data.
	 326  	const text = "abcdefghijklmnopqrstuvwxyz"
	 327  	buf := strings.NewReader(text)
	 328  	s := NewScanner(&slowReader{1, buf})
	 329  	s.Split(errorSplit)
	 330  	var i int
	 331  	for i = 0; s.Scan(); i++ {
	 332  		if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
	 333  			t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
	 334  		}
	 335  	}
	 336  	// Check correct termination location and error.
	 337  	if i != okCount {
	 338  		t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
	 339  	}
	 340  	err := s.Err()
	 341  	if err != testError {
	 342  		t.Fatalf("expected %q got %v", testError, err)
	 343  	}
	 344  }
	 345  
	 346  // Test that an EOF is overridden by a user-generated scan error.
	 347  func TestErrAtEOF(t *testing.T) {
	 348  	s := NewScanner(strings.NewReader("1 2 33"))
	 349  	// This splitter will fail on last entry, after s.err==EOF.
	 350  	split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
	 351  		advance, token, err = ScanWords(data, atEOF)
	 352  		if len(token) > 1 {
	 353  			if s.ErrOrEOF() != io.EOF {
	 354  				t.Fatal("not testing EOF")
	 355  			}
	 356  			err = testError
	 357  		}
	 358  		return
	 359  	}
	 360  	s.Split(split)
	 361  	for s.Scan() {
	 362  	}
	 363  	if s.Err() != testError {
	 364  		t.Fatal("wrong error:", s.Err())
	 365  	}
	 366  }
	 367  
	 368  // Test for issue 5268.
	 369  type alwaysError struct{}
	 370  
	 371  func (alwaysError) Read(p []byte) (int, error) {
	 372  	return 0, io.ErrUnexpectedEOF
	 373  }
	 374  
	 375  func TestNonEOFWithEmptyRead(t *testing.T) {
	 376  	scanner := NewScanner(alwaysError{})
	 377  	for scanner.Scan() {
	 378  		t.Fatal("read should fail")
	 379  	}
	 380  	err := scanner.Err()
	 381  	if err != io.ErrUnexpectedEOF {
	 382  		t.Errorf("unexpected error: %v", err)
	 383  	}
	 384  }
	 385  
	 386  // Test that Scan finishes if we have endless empty reads.
	 387  type endlessZeros struct{}
	 388  
	 389  func (endlessZeros) Read(p []byte) (int, error) {
	 390  	return 0, nil
	 391  }
	 392  
	 393  func TestBadReader(t *testing.T) {
	 394  	scanner := NewScanner(endlessZeros{})
	 395  	for scanner.Scan() {
	 396  		t.Fatal("read should fail")
	 397  	}
	 398  	err := scanner.Err()
	 399  	if err != io.ErrNoProgress {
	 400  		t.Errorf("unexpected error: %v", err)
	 401  	}
	 402  }
	 403  
	 404  func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
	 405  	const word = "ipsum"
	 406  	s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
	 407  	scanner := NewScanner(strings.NewReader(s))
	 408  	scanner.MaxTokenSize(smallMaxTokenSize)
	 409  	scanner.Split(ScanWords)
	 410  	if !scanner.Scan() {
	 411  		t.Fatalf("scan failed: %v", scanner.Err())
	 412  	}
	 413  	if token := scanner.Text(); token != word {
	 414  		t.Fatalf("unexpected token: %v", token)
	 415  	}
	 416  }
	 417  
	 418  // Test that empty tokens, including at end of line or end of file, are found by the scanner.
	 419  // Issue 8672: Could miss final empty token.
	 420  
	 421  func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
	 422  	for i := 0; i < len(data); i++ {
	 423  		if data[i] == ',' {
	 424  			return i + 1, data[:i], nil
	 425  		}
	 426  	}
	 427  	return 0, data, ErrFinalToken
	 428  }
	 429  
	 430  func testEmptyTokens(t *testing.T, text string, values []string) {
	 431  	s := NewScanner(strings.NewReader(text))
	 432  	s.Split(commaSplit)
	 433  	var i int
	 434  	for i = 0; s.Scan(); i++ {
	 435  		if i >= len(values) {
	 436  			t.Fatalf("got %d fields, expected %d", i+1, len(values))
	 437  		}
	 438  		if s.Text() != values[i] {
	 439  			t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
	 440  		}
	 441  	}
	 442  	if i != len(values) {
	 443  		t.Fatalf("got %d fields, expected %d", i, len(values))
	 444  	}
	 445  	if err := s.Err(); err != nil {
	 446  		t.Fatal(err)
	 447  	}
	 448  }
	 449  
	 450  func TestEmptyTokens(t *testing.T) {
	 451  	testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
	 452  }
	 453  
	 454  func TestWithNoEmptyTokens(t *testing.T) {
	 455  	testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
	 456  }
	 457  
	 458  func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
	 459  	if len(data) > 0 {
	 460  		return 1, data[:1], nil
	 461  	}
	 462  	return 0, data, nil
	 463  }
	 464  
	 465  func TestDontLoopForever(t *testing.T) {
	 466  	s := NewScanner(strings.NewReader("abc"))
	 467  	s.Split(loopAtEOFSplit)
	 468  	// Expect a panic
	 469  	defer func() {
	 470  		err := recover()
	 471  		if err == nil {
	 472  			t.Fatal("should have panicked")
	 473  		}
	 474  		if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") {
	 475  			panic(err)
	 476  		}
	 477  	}()
	 478  	for count := 0; s.Scan(); count++ {
	 479  		if count > 1000 {
	 480  			t.Fatal("looping")
	 481  		}
	 482  	}
	 483  	if s.Err() != nil {
	 484  		t.Fatal("after scan:", s.Err())
	 485  	}
	 486  }
	 487  
	 488  func TestBlankLines(t *testing.T) {
	 489  	s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000)))
	 490  	for count := 0; s.Scan(); count++ {
	 491  		if count > 2000 {
	 492  			t.Fatal("looping")
	 493  		}
	 494  	}
	 495  	if s.Err() != nil {
	 496  		t.Fatal("after scan:", s.Err())
	 497  	}
	 498  }
	 499  
	 500  type countdown int
	 501  
	 502  func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
	 503  	if *c > 0 {
	 504  		*c--
	 505  		return 1, data[:1], nil
	 506  	}
	 507  	return 0, nil, nil
	 508  }
	 509  
	 510  // Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
	 511  func TestEmptyLinesOK(t *testing.T) {
	 512  	c := countdown(10000)
	 513  	s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
	 514  	s.Split(c.split)
	 515  	for s.Scan() {
	 516  	}
	 517  	if s.Err() != nil {
	 518  		t.Fatal("after scan:", s.Err())
	 519  	}
	 520  	if c != 0 {
	 521  		t.Fatalf("stopped with %d left to process", c)
	 522  	}
	 523  }
	 524  
	 525  // Make sure we can read a huge token if a big enough buffer is provided.
	 526  func TestHugeBuffer(t *testing.T) {
	 527  	text := strings.Repeat("x", 2*MaxScanTokenSize)
	 528  	s := NewScanner(strings.NewReader(text + "\n"))
	 529  	s.Buffer(make([]byte, 100), 3*MaxScanTokenSize)
	 530  	for s.Scan() {
	 531  		token := s.Text()
	 532  		if token != text {
	 533  			t.Errorf("scan got incorrect token of length %d", len(token))
	 534  		}
	 535  	}
	 536  	if s.Err() != nil {
	 537  		t.Fatal("after scan:", s.Err())
	 538  	}
	 539  }
	 540  
	 541  // negativeEOFReader returns an invalid -1 at the end, as though it
	 542  // were wrapping the read system call.
	 543  type negativeEOFReader int
	 544  
	 545  func (r *negativeEOFReader) Read(p []byte) (int, error) {
	 546  	if *r > 0 {
	 547  		c := int(*r)
	 548  		if c > len(p) {
	 549  			c = len(p)
	 550  		}
	 551  		for i := 0; i < c; i++ {
	 552  			p[i] = 'a'
	 553  		}
	 554  		p[c-1] = '\n'
	 555  		*r -= negativeEOFReader(c)
	 556  		return c, nil
	 557  	}
	 558  	return -1, io.EOF
	 559  }
	 560  
	 561  // Test that the scanner doesn't panic and returns ErrBadReadCount
	 562  // on a reader that returns a negative count of bytes read (issue 38053).
	 563  func TestNegativeEOFReader(t *testing.T) {
	 564  	r := negativeEOFReader(10)
	 565  	scanner := NewScanner(&r)
	 566  	c := 0
	 567  	for scanner.Scan() {
	 568  		c++
	 569  		if c > 1 {
	 570  			t.Error("read too many lines")
	 571  			break
	 572  		}
	 573  	}
	 574  	if got, want := scanner.Err(), ErrBadReadCount; got != want {
	 575  		t.Errorf("scanner.Err: got %v, want %v", got, want)
	 576  	}
	 577  }
	 578  
	 579  // largeReader returns an invalid count that is larger than the number
	 580  // of bytes requested.
	 581  type largeReader struct{}
	 582  
	 583  func (largeReader) Read(p []byte) (int, error) {
	 584  	return len(p) + 1, nil
	 585  }
	 586  
	 587  // Test that the scanner doesn't panic and returns ErrBadReadCount
	 588  // on a reader that returns an impossibly large count of bytes read (issue 38053).
	 589  func TestLargeReader(t *testing.T) {
	 590  	scanner := NewScanner(largeReader{})
	 591  	for scanner.Scan() {
	 592  	}
	 593  	if got, want := scanner.Err(), ErrBadReadCount; got != want {
	 594  		t.Errorf("scanner.Err: got %v, want %v", got, want)
	 595  	}
	 596  }
	 597
View as plain text