...

Source file src/encoding/csv/reader_test.go

Documentation: encoding/csv

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package csv
		 6  
		 7  import (
		 8  	"errors"
		 9  	"fmt"
		10  	"io"
		11  	"reflect"
		12  	"strings"
		13  	"testing"
		14  	"unicode/utf8"
		15  )
		16  
		17  type readTest struct {
		18  	Name			string
		19  	Input		 string
		20  	Output		[][]string
		21  	Positions [][][2]int
		22  	Errors		[]error
		23  
		24  	// These fields are copied into the Reader
		25  	Comma							rune
		26  	Comment						rune
		27  	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
		28  	FieldsPerRecord		int
		29  	LazyQuotes				 bool
		30  	TrimLeadingSpace	 bool
		31  	ReuseRecord				bool
		32  }
		33  
		34  // In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote
		35  // the start of a field, a record boundary and the position of an error respectively.
		36  // They are removed before parsing and are used to verify the position
		37  // information reported by FieldPos.
		38  
		39  var readTests = []readTest{{
		40  	Name:	 "Simple",
		41  	Input:	"§a,§b,§c\n",
		42  	Output: [][]string{{"a", "b", "c"}},
		43  }, {
		44  	Name:	 "CRLF",
		45  	Input:	"§a,§b\r\n¶§c,§d\r\n",
		46  	Output: [][]string{{"a", "b"}, {"c", "d"}},
		47  }, {
		48  	Name:	 "BareCR",
		49  	Input:	"§a,§b\rc,§d\r\n",
		50  	Output: [][]string{{"a", "b\rc", "d"}},
		51  }, {
		52  	Name: "RFC4180test",
		53  	Input: `§#field1,§field2,§field3
		54  ¶§"aaa",§"bb
		55  b",§"ccc"
		56  ¶§"a,a",§"b""bb",§"ccc"
		57  ¶§zzz,§yyy,§xxx
		58  `,
		59  	Output: [][]string{
		60  		{"#field1", "field2", "field3"},
		61  		{"aaa", "bb\nb", "ccc"},
		62  		{"a,a", `b"bb`, "ccc"},
		63  		{"zzz", "yyy", "xxx"},
		64  	},
		65  	UseFieldsPerRecord: true,
		66  	FieldsPerRecord:		0,
		67  }, {
		68  	Name:	 "NoEOLTest",
		69  	Input:	"§a,§b,§c",
		70  	Output: [][]string{{"a", "b", "c"}},
		71  }, {
		72  	Name:	 "Semicolon",
		73  	Input:	"§a;§b;§c\n",
		74  	Output: [][]string{{"a", "b", "c"}},
		75  	Comma:	';',
		76  }, {
		77  	Name: "MultiLine",
		78  	Input: `§"two
		79  line",§"one line",§"three
		80  line
		81  field"`,
		82  	Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
		83  }, {
		84  	Name:	"BlankLine",
		85  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
		86  	Output: [][]string{
		87  		{"a", "b", "c"},
		88  		{"d", "e", "f"},
		89  	},
		90  }, {
		91  	Name:	"BlankLineFieldCount",
		92  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
		93  	Output: [][]string{
		94  		{"a", "b", "c"},
		95  		{"d", "e", "f"},
		96  	},
		97  	UseFieldsPerRecord: true,
		98  	FieldsPerRecord:		0,
		99  }, {
	 100  	Name:						 "TrimSpace",
	 101  	Input:						" §a,	§b,	 §c\n",
	 102  	Output:					 [][]string{{"a", "b", "c"}},
	 103  	TrimLeadingSpace: true,
	 104  }, {
	 105  	Name:	 "LeadingSpace",
	 106  	Input:	"§ a,§	b,§	 c\n",
	 107  	Output: [][]string{{" a", "	b", "	 c"}},
	 108  }, {
	 109  	Name:		"Comment",
	 110  	Input:	 "#1,2,3\n§a,§b,§c\n#comment",
	 111  	Output:	[][]string{{"a", "b", "c"}},
	 112  	Comment: '#',
	 113  }, {
	 114  	Name:	 "NoComment",
	 115  	Input:	"§#1,§2,§3\n¶§a,§b,§c",
	 116  	Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
	 117  }, {
	 118  	Name:			 "LazyQuotes",
	 119  	Input:			`§a "word",§"1"2",§a",§"b`,
	 120  	Output:		 [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
	 121  	LazyQuotes: true,
	 122  }, {
	 123  	Name:			 "BareQuotes",
	 124  	Input:			`§a "word",§"1"2",§a"`,
	 125  	Output:		 [][]string{{`a "word"`, `1"2`, `a"`}},
	 126  	LazyQuotes: true,
	 127  }, {
	 128  	Name:			 "BareDoubleQuotes",
	 129  	Input:			`§a""b,§c`,
	 130  	Output:		 [][]string{{`a""b`, `c`}},
	 131  	LazyQuotes: true,
	 132  }, {
	 133  	Name:	 "BadDoubleQuotes",
	 134  	Input:	`§a∑""b,c`,
	 135  	Errors: []error{&ParseError{Err: ErrBareQuote}},
	 136  }, {
	 137  	Name:						 "TrimQuote",
	 138  	Input:						` §"a",§" b",§c`,
	 139  	Output:					 [][]string{{"a", " b", "c"}},
	 140  	TrimLeadingSpace: true,
	 141  }, {
	 142  	Name:	 "BadBareQuote",
	 143  	Input:	`§a ∑"word","b"`,
	 144  	Errors: []error{&ParseError{Err: ErrBareQuote}},
	 145  }, {
	 146  	Name:	 "BadTrailingQuote",
	 147  	Input:	`§"a word",b∑"`,
	 148  	Errors: []error{&ParseError{Err: ErrBareQuote}},
	 149  }, {
	 150  	Name:	 "ExtraneousQuote",
	 151  	Input:	`§"a ∑"word","b"`,
	 152  	Errors: []error{&ParseError{Err: ErrQuote}},
	 153  }, {
	 154  	Name:							 "BadFieldCount",
	 155  	Input:							"§a,§b,§c\n¶∑§d,§e",
	 156  	Errors:						 []error{nil, &ParseError{Err: ErrFieldCount}},
	 157  	Output:						 [][]string{{"a", "b", "c"}, {"d", "e"}},
	 158  	UseFieldsPerRecord: true,
	 159  	FieldsPerRecord:		0,
	 160  }, {
	 161  	Name:							 "BadFieldCountMultiple",
	 162  	Input:							"§a,§b,§c\n¶∑§d,§e\n¶∑§f",
	 163  	Errors:						 []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}},
	 164  	Output:						 [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}},
	 165  	UseFieldsPerRecord: true,
	 166  	FieldsPerRecord:		0,
	 167  }, {
	 168  	Name:							 "BadFieldCount1",
	 169  	Input:							`§∑a,§b,§c`,
	 170  	Errors:						 []error{&ParseError{Err: ErrFieldCount}},
	 171  	Output:						 [][]string{{"a", "b", "c"}},
	 172  	UseFieldsPerRecord: true,
	 173  	FieldsPerRecord:		2,
	 174  }, {
	 175  	Name:	 "FieldCount",
	 176  	Input:	"§a,§b,§c\n¶§d,§e",
	 177  	Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
	 178  }, {
	 179  	Name:	 "TrailingCommaEOF",
	 180  	Input:	"§a,§b,§c,§",
	 181  	Output: [][]string{{"a", "b", "c", ""}},
	 182  }, {
	 183  	Name:	 "TrailingCommaEOL",
	 184  	Input:	"§a,§b,§c,§\n",
	 185  	Output: [][]string{{"a", "b", "c", ""}},
	 186  }, {
	 187  	Name:						 "TrailingCommaSpaceEOF",
	 188  	Input:						"§a,§b,§c, §",
	 189  	Output:					 [][]string{{"a", "b", "c", ""}},
	 190  	TrimLeadingSpace: true,
	 191  }, {
	 192  	Name:						 "TrailingCommaSpaceEOL",
	 193  	Input:						"§a,§b,§c, §\n",
	 194  	Output:					 [][]string{{"a", "b", "c", ""}},
	 195  	TrimLeadingSpace: true,
	 196  }, {
	 197  	Name:						 "TrailingCommaLine3",
	 198  	Input:						"§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§",
	 199  	Output:					 [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
	 200  	TrimLeadingSpace: true,
	 201  }, {
	 202  	Name:	 "NotTrailingComma3",
	 203  	Input:	"§a,§b,§c,§ \n",
	 204  	Output: [][]string{{"a", "b", "c", " "}},
	 205  }, {
	 206  	Name: "CommaFieldTest",
	 207  	Input: `§x,§y,§z,§w
	 208  ¶§x,§y,§z,§
	 209  ¶§x,§y,§,§
	 210  ¶§x,§,§,§
	 211  ¶§,§,§,§
	 212  ¶§"x",§"y",§"z",§"w"
	 213  ¶§"x",§"y",§"z",§""
	 214  ¶§"x",§"y",§"",§""
	 215  ¶§"x",§"",§"",§""
	 216  ¶§"",§"",§"",§""
	 217  `,
	 218  	Output: [][]string{
	 219  		{"x", "y", "z", "w"},
	 220  		{"x", "y", "z", ""},
	 221  		{"x", "y", "", ""},
	 222  		{"x", "", "", ""},
	 223  		{"", "", "", ""},
	 224  		{"x", "y", "z", "w"},
	 225  		{"x", "y", "z", ""},
	 226  		{"x", "y", "", ""},
	 227  		{"x", "", "", ""},
	 228  		{"", "", "", ""},
	 229  	},
	 230  }, {
	 231  	Name:	"TrailingCommaIneffective1",
	 232  	Input: "§a,§b,§\n¶§c,§d,§e",
	 233  	Output: [][]string{
	 234  		{"a", "b", ""},
	 235  		{"c", "d", "e"},
	 236  	},
	 237  	TrimLeadingSpace: true,
	 238  }, {
	 239  	Name:	"ReadAllReuseRecord",
	 240  	Input: "§a,§b\n¶§c,§d",
	 241  	Output: [][]string{
	 242  		{"a", "b"},
	 243  		{"c", "d"},
	 244  	},
	 245  	ReuseRecord: true,
	 246  }, {
	 247  	Name:	 "StartLine1", // Issue 19019
	 248  	Input:	"§a,\"b\nc∑\"d,e",
	 249  	Errors: []error{&ParseError{Err: ErrQuote}},
	 250  }, {
	 251  	Name:	 "StartLine2",
	 252  	Input:	"§a,§b\n¶§\"d\n\n,e∑",
	 253  	Errors: []error{nil, &ParseError{Err: ErrQuote}},
	 254  	Output: [][]string{{"a", "b"}},
	 255  }, {
	 256  	Name:	"CRLFInQuotedField", // Issue 21201
	 257  	Input: "§A,§\"Hello\r\nHi\",§B\r\n",
	 258  	Output: [][]string{
	 259  		{"A", "Hello\nHi", "B"},
	 260  	},
	 261  }, {
	 262  	Name:	 "BinaryBlobField", // Issue 19410
	 263  	Input:	"§x09\x41\xb4\x1c,§aktau",
	 264  	Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
	 265  }, {
	 266  	Name:	 "TrailingCR",
	 267  	Input:	"§field1,§field2\r",
	 268  	Output: [][]string{{"field1", "field2"}},
	 269  }, {
	 270  	Name:	 "QuotedTrailingCR",
	 271  	Input:	"§\"field\"\r",
	 272  	Output: [][]string{{"field"}},
	 273  }, {
	 274  	Name:	 "QuotedTrailingCRCR",
	 275  	Input:	"§\"field∑\"\r\r",
	 276  	Errors: []error{&ParseError{Err: ErrQuote}},
	 277  }, {
	 278  	Name:	 "FieldCR",
	 279  	Input:	"§field\rfield\r",
	 280  	Output: [][]string{{"field\rfield"}},
	 281  }, {
	 282  	Name:	 "FieldCRCR",
	 283  	Input:	"§field\r\rfield\r\r",
	 284  	Output: [][]string{{"field\r\rfield\r"}},
	 285  }, {
	 286  	Name:	 "FieldCRCRLF",
	 287  	Input:	"§field\r\r\n¶§field\r\r\n",
	 288  	Output: [][]string{{"field\r"}, {"field\r"}},
	 289  }, {
	 290  	Name:	 "FieldCRCRLFCR",
	 291  	Input:	"§field\r\r\n¶§\rfield\r\r\n\r",
	 292  	Output: [][]string{{"field\r"}, {"\rfield\r"}},
	 293  }, {
	 294  	Name:	 "FieldCRCRLFCRCR",
	 295  	Input:	"§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r",
	 296  	Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
	 297  }, {
	 298  	Name:	"MultiFieldCRCRLFCRCR",
	 299  	Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§",
	 300  	Output: [][]string{
	 301  		{"field1", "field2\r"},
	 302  		{"\r\rfield1", "field2\r"},
	 303  		{"\r\r", ""},
	 304  	},
	 305  }, {
	 306  	Name:						 "NonASCIICommaAndComment",
	 307  	Input:						"§a£§b,c£ \t§d,e\n€ comment\n",
	 308  	Output:					 [][]string{{"a", "b,c", "d,e"}},
	 309  	TrimLeadingSpace: true,
	 310  	Comma:						'£',
	 311  	Comment:					'€',
	 312  }, {
	 313  	Name:		"NonASCIICommaAndCommentWithQuotes",
	 314  	Input:	 "§a€§\"	b,\"€§ c\nλ comment\n",
	 315  	Output:	[][]string{{"a", "	b,", " c"}},
	 316  	Comma:	 '€',
	 317  	Comment: 'λ',
	 318  }, {
	 319  	// λ and θ start with the same byte.
	 320  	// This tests that the parser doesn't confuse such characters.
	 321  	Name:		"NonASCIICommaConfusion",
	 322  	Input:	 "§\"abθcd\"λ§efθgh",
	 323  	Output:	[][]string{{"abθcd", "efθgh"}},
	 324  	Comma:	 'λ',
	 325  	Comment: '€',
	 326  }, {
	 327  	Name:		"NonASCIICommentConfusion",
	 328  	Input:	 "§λ\n¶§λ\nθ\n¶§λ\n",
	 329  	Output:	[][]string{{"λ"}, {"λ"}, {"λ"}},
	 330  	Comment: 'θ',
	 331  }, {
	 332  	Name:	 "QuotedFieldMultipleLF",
	 333  	Input:	"§\"\n\n\n\n\"",
	 334  	Output: [][]string{{"\n\n\n\n"}},
	 335  }, {
	 336  	Name:	"MultipleCRLF",
	 337  	Input: "\r\n\r\n\r\n\r\n",
	 338  }, {
	 339  	// The implementation may read each line in several chunks if it doesn't fit entirely
	 340  	// in the read buffer, so we should test the code to handle that condition.
	 341  	Name:		"HugeLines",
	 342  	Input:	 strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000),
	 343  	Output:	[][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
	 344  	Comment: '#',
	 345  }, {
	 346  	Name:	 "QuoteWithTrailingCRLF",
	 347  	Input:	"§\"foo∑\"bar\"\r\n",
	 348  	Errors: []error{&ParseError{Err: ErrQuote}},
	 349  }, {
	 350  	Name:			 "LazyQuoteWithTrailingCRLF",
	 351  	Input:			"§\"foo\"bar\"\r\n",
	 352  	Output:		 [][]string{{`foo"bar`}},
	 353  	LazyQuotes: true,
	 354  }, {
	 355  	Name:	 "DoubleQuoteWithTrailingCRLF",
	 356  	Input:	"§\"foo\"\"bar\"\r\n",
	 357  	Output: [][]string{{`foo"bar`}},
	 358  }, {
	 359  	Name:	 "EvenQuotes",
	 360  	Input:	`§""""""""`,
	 361  	Output: [][]string{{`"""`}},
	 362  }, {
	 363  	Name:	 "OddQuotes",
	 364  	Input:	`§"""""""∑`,
	 365  	Errors: []error{&ParseError{Err: ErrQuote}},
	 366  }, {
	 367  	Name:			 "LazyOddQuotes",
	 368  	Input:			`§"""""""`,
	 369  	Output:		 [][]string{{`"""`}},
	 370  	LazyQuotes: true,
	 371  }, {
	 372  	Name:	 "BadComma1",
	 373  	Comma:	'\n',
	 374  	Errors: []error{errInvalidDelim},
	 375  }, {
	 376  	Name:	 "BadComma2",
	 377  	Comma:	'\r',
	 378  	Errors: []error{errInvalidDelim},
	 379  }, {
	 380  	Name:	 "BadComma3",
	 381  	Comma:	'"',
	 382  	Errors: []error{errInvalidDelim},
	 383  }, {
	 384  	Name:	 "BadComma4",
	 385  	Comma:	utf8.RuneError,
	 386  	Errors: []error{errInvalidDelim},
	 387  }, {
	 388  	Name:		"BadComment1",
	 389  	Comment: '\n',
	 390  	Errors:	[]error{errInvalidDelim},
	 391  }, {
	 392  	Name:		"BadComment2",
	 393  	Comment: '\r',
	 394  	Errors:	[]error{errInvalidDelim},
	 395  }, {
	 396  	Name:		"BadComment3",
	 397  	Comment: utf8.RuneError,
	 398  	Errors:	[]error{errInvalidDelim},
	 399  }, {
	 400  	Name:		"BadCommaComment",
	 401  	Comma:	 'X',
	 402  	Comment: 'X',
	 403  	Errors:	[]error{errInvalidDelim},
	 404  }}
	 405  
	 406  func TestRead(t *testing.T) {
	 407  	newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int) {
	 408  		positions, errPositions, input := makePositions(tt.Input)
	 409  		r := NewReader(strings.NewReader(input))
	 410  
	 411  		if tt.Comma != 0 {
	 412  			r.Comma = tt.Comma
	 413  		}
	 414  		r.Comment = tt.Comment
	 415  		if tt.UseFieldsPerRecord {
	 416  			r.FieldsPerRecord = tt.FieldsPerRecord
	 417  		} else {
	 418  			r.FieldsPerRecord = -1
	 419  		}
	 420  		r.LazyQuotes = tt.LazyQuotes
	 421  		r.TrimLeadingSpace = tt.TrimLeadingSpace
	 422  		r.ReuseRecord = tt.ReuseRecord
	 423  		return r, positions, errPositions
	 424  	}
	 425  
	 426  	for _, tt := range readTests {
	 427  		t.Run(tt.Name, func(t *testing.T) {
	 428  			r, positions, errPositions := newReader(tt)
	 429  			out, err := r.ReadAll()
	 430  			if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil {
	 431  				if !reflect.DeepEqual(err, wantErr) {
	 432  					t.Fatalf("ReadAll() error mismatch:\ngot	%v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr)
	 433  				}
	 434  				if out != nil {
	 435  					t.Fatalf("ReadAll() output:\ngot	%q\nwant nil", out)
	 436  				}
	 437  			} else {
	 438  				if err != nil {
	 439  					t.Fatalf("unexpected Readall() error: %v", err)
	 440  				}
	 441  				if !reflect.DeepEqual(out, tt.Output) {
	 442  					t.Fatalf("ReadAll() output:\ngot	%q\nwant %q", out, tt.Output)
	 443  				}
	 444  			}
	 445  
	 446  			// Check field and error positions.
	 447  			r, _, _ = newReader(tt)
	 448  			for recNum := 0; ; recNum++ {
	 449  				rec, err := r.Read()
	 450  				var wantErr error
	 451  				if recNum < len(tt.Errors) && tt.Errors[recNum] != nil {
	 452  					wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions)
	 453  				} else if recNum >= len(tt.Output) {
	 454  					wantErr = io.EOF
	 455  				}
	 456  				if !reflect.DeepEqual(err, wantErr) {
	 457  					t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr)
	 458  				}
	 459  				// ErrFieldCount is explicitly non-fatal.
	 460  				if err != nil && !errors.Is(err, ErrFieldCount) {
	 461  					if recNum < len(tt.Output) {
	 462  						t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output))
	 463  					}
	 464  					break
	 465  				}
	 466  				if got, want := rec, tt.Output[recNum]; !reflect.DeepEqual(got, want) {
	 467  					t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want)
	 468  				}
	 469  				pos := positions[recNum]
	 470  				if len(pos) != len(rec) {
	 471  					t.Fatalf("mismatched position length at record %d", recNum)
	 472  				}
	 473  				for i := range rec {
	 474  					line, col := r.FieldPos(i)
	 475  					if got, want := [2]int{line, col}, pos[i]; got != want {
	 476  						t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want)
	 477  					}
	 478  				}
	 479  			}
	 480  		})
	 481  	}
	 482  }
	 483  
	 484  // firstError returns the first non-nil error in errs,
	 485  // with the position adjusted according to the error's
	 486  // index inside positions.
	 487  func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error {
	 488  	for i, err := range errs {
	 489  		if err != nil {
	 490  			return errorWithPosition(err, i, positions, errPositions)
	 491  		}
	 492  	}
	 493  	return nil
	 494  }
	 495  
	 496  func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error {
	 497  	parseErr, ok := err.(*ParseError)
	 498  	if !ok {
	 499  		return err
	 500  	}
	 501  	if recNum >= len(positions) {
	 502  		panic(fmt.Errorf("no positions found for error at record %d", recNum))
	 503  	}
	 504  	errPos, ok := errPositions[recNum]
	 505  	if !ok {
	 506  		panic(fmt.Errorf("no error position found for error at record %d", recNum))
	 507  	}
	 508  	parseErr1 := *parseErr
	 509  	parseErr1.StartLine = positions[recNum][0][0]
	 510  	parseErr1.Line = errPos[0]
	 511  	parseErr1.Column = errPos[1]
	 512  	return &parseErr1
	 513  }
	 514  
	 515  // makePositions returns the expected field positions of all
	 516  // the fields in text, the positions of any errors, and the text with the position markers
	 517  // removed.
	 518  //
	 519  // The start of each field is marked with a § symbol;
	 520  // CSV lines are separated by ¶ symbols;
	 521  // Error positions are marked with ∑ symbols.
	 522  func makePositions(text string) ([][][2]int, map[int][2]int, string) {
	 523  	buf := make([]byte, 0, len(text))
	 524  	var positions [][][2]int
	 525  	errPositions := make(map[int][2]int)
	 526  	line, col := 1, 1
	 527  	recNum := 0
	 528  
	 529  	for len(text) > 0 {
	 530  		r, size := utf8.DecodeRuneInString(text)
	 531  		switch r {
	 532  		case '\n':
	 533  			line++
	 534  			col = 1
	 535  			buf = append(buf, '\n')
	 536  		case '§':
	 537  			if len(positions) == 0 {
	 538  				positions = append(positions, [][2]int{})
	 539  			}
	 540  			positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col})
	 541  		case '¶':
	 542  			positions = append(positions, [][2]int{})
	 543  			recNum++
	 544  		case '∑':
	 545  			errPositions[recNum] = [2]int{line, col}
	 546  		default:
	 547  			buf = append(buf, text[:size]...)
	 548  			col += size
	 549  		}
	 550  		text = text[size:]
	 551  	}
	 552  	return positions, errPositions, string(buf)
	 553  }
	 554  
	 555  // nTimes is an io.Reader which yields the string s n times.
	 556  type nTimes struct {
	 557  	s	 string
	 558  	n	 int
	 559  	off int
	 560  }
	 561  
	 562  func (r *nTimes) Read(p []byte) (n int, err error) {
	 563  	for {
	 564  		if r.n <= 0 || r.s == "" {
	 565  			return n, io.EOF
	 566  		}
	 567  		n0 := copy(p, r.s[r.off:])
	 568  		p = p[n0:]
	 569  		n += n0
	 570  		r.off += n0
	 571  		if r.off == len(r.s) {
	 572  			r.off = 0
	 573  			r.n--
	 574  		}
	 575  		if len(p) == 0 {
	 576  			return
	 577  		}
	 578  	}
	 579  }
	 580  
	 581  // benchmarkRead measures reading the provided CSV rows data.
	 582  // initReader, if non-nil, modifies the Reader before it's used.
	 583  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
	 584  	b.ReportAllocs()
	 585  	r := NewReader(&nTimes{s: rows, n: b.N})
	 586  	if initReader != nil {
	 587  		initReader(r)
	 588  	}
	 589  	for {
	 590  		_, err := r.Read()
	 591  		if err == io.EOF {
	 592  			break
	 593  		}
	 594  		if err != nil {
	 595  			b.Fatal(err)
	 596  		}
	 597  	}
	 598  }
	 599  
	 600  const benchmarkCSVData = `x,y,z,w
	 601  x,y,z,
	 602  x,y,,
	 603  x,,,
	 604  ,,,
	 605  "x","y","z","w"
	 606  "x","y","z",""
	 607  "x","y","",""
	 608  "x","","",""
	 609  "","","",""
	 610  `
	 611  
	 612  func BenchmarkRead(b *testing.B) {
	 613  	benchmarkRead(b, nil, benchmarkCSVData)
	 614  }
	 615  
	 616  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
	 617  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
	 618  }
	 619  
	 620  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
	 621  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
	 622  }
	 623  
	 624  func BenchmarkReadLargeFields(b *testing.B) {
	 625  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	 626  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
	 627  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	 628  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	 629  `, 3))
	 630  }
	 631  
	 632  func BenchmarkReadReuseRecord(b *testing.B) {
	 633  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
	 634  }
	 635  
	 636  func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
	 637  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
	 638  }
	 639  
	 640  func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
	 641  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
	 642  }
	 643  
	 644  func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
	 645  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	 646  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
	 647  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	 648  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	 649  `, 3))
	 650  }
	 651  

View as plain text