1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package csv
53
54 import (
55 "bufio"
56 "bytes"
57 "errors"
58 "fmt"
59 "io"
60 "unicode"
61 "unicode/utf8"
62 )
63
64
65
66 type ParseError struct {
67 StartLine int
68 Line int
69 Column int
70 Err error
71 }
72
73 func (e *ParseError) Error() string {
74 if e.Err == ErrFieldCount {
75 return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
76 }
77 if e.StartLine != e.Line {
78 return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
79 }
80 return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
81 }
82
83 func (e *ParseError) Unwrap() error { return e.Err }
84
85
86 var (
87 ErrTrailingComma = errors.New("extra delimiter at end of line")
88 ErrBareQuote = errors.New("bare \" in non-quoted-field")
89 ErrQuote = errors.New("extraneous or missing \" in quoted-field")
90 ErrFieldCount = errors.New("wrong number of fields")
91 )
92
93 var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
94
95 func validDelim(r rune) bool {
96 return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
97 }
98
99
100
101
102
103
104
105
106
107
108 type Reader struct {
109
110
111
112
113 Comma rune
114
115
116
117
118
119
120
121
122 Comment rune
123
124
125
126
127
128
129
130 FieldsPerRecord int
131
132
133
134 LazyQuotes bool
135
136
137
138 TrimLeadingSpace bool
139
140
141
142
143 ReuseRecord bool
144
145 TrailingComma bool
146
147 r *bufio.Reader
148
149
150 numLine int
151
152
153 rawBuffer []byte
154
155
156
157
158
159 recordBuffer []byte
160
161
162
163 fieldIndexes []int
164
165
166
167 fieldPositions []position
168
169
170 lastRecord []string
171 }
172
173
174 func NewReader(r io.Reader) *Reader {
175 return &Reader{
176 Comma: ',',
177 r: bufio.NewReader(r),
178 }
179 }
180
181
182
183
184
185
186
187
188
189 func (r *Reader) Read() (record []string, err error) {
190 if r.ReuseRecord {
191 record, err = r.readRecord(r.lastRecord)
192 r.lastRecord = record
193 } else {
194 record, err = r.readRecord(nil)
195 }
196 return record, err
197 }
198
199
200
201
202
203
204
205 func (r *Reader) FieldPos(field int) (line, column int) {
206 if field < 0 || field >= len(r.fieldPositions) {
207 panic("out of range index passed to FieldPos")
208 }
209 p := &r.fieldPositions[field]
210 return p.line, p.col
211 }
212
213
214 type position struct {
215 line, col int
216 }
217
218
219
220
221
222
223 func (r *Reader) ReadAll() (records [][]string, err error) {
224 for {
225 record, err := r.readRecord(nil)
226 if err == io.EOF {
227 return records, nil
228 }
229 if err != nil {
230 return nil, err
231 }
232 records = append(records, record)
233 }
234 }
235
236
237
238
239
240 func (r *Reader) readLine() ([]byte, error) {
241 line, err := r.r.ReadSlice('\n')
242 if err == bufio.ErrBufferFull {
243 r.rawBuffer = append(r.rawBuffer[:0], line...)
244 for err == bufio.ErrBufferFull {
245 line, err = r.r.ReadSlice('\n')
246 r.rawBuffer = append(r.rawBuffer, line...)
247 }
248 line = r.rawBuffer
249 }
250 if len(line) > 0 && err == io.EOF {
251 err = nil
252
253 if line[len(line)-1] == '\r' {
254 line = line[:len(line)-1]
255 }
256 }
257 r.numLine++
258
259 if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
260 line[n-2] = '\n'
261 line = line[:n-1]
262 }
263 return line, err
264 }
265
266
267 func lengthNL(b []byte) int {
268 if len(b) > 0 && b[len(b)-1] == '\n' {
269 return 1
270 }
271 return 0
272 }
273
274
275 func nextRune(b []byte) rune {
276 r, _ := utf8.DecodeRune(b)
277 return r
278 }
279
280 func (r *Reader) readRecord(dst []string) ([]string, error) {
281 if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
282 return nil, errInvalidDelim
283 }
284
285
286 var line []byte
287 var errRead error
288 for errRead == nil {
289 line, errRead = r.readLine()
290 if r.Comment != 0 && nextRune(line) == r.Comment {
291 line = nil
292 continue
293 }
294 if errRead == nil && len(line) == lengthNL(line) {
295 line = nil
296 continue
297 }
298 break
299 }
300 if errRead == io.EOF {
301 return nil, errRead
302 }
303
304
305 var err error
306 const quoteLen = len(`"`)
307 commaLen := utf8.RuneLen(r.Comma)
308 recLine := r.numLine
309 r.recordBuffer = r.recordBuffer[:0]
310 r.fieldIndexes = r.fieldIndexes[:0]
311 r.fieldPositions = r.fieldPositions[:0]
312 pos := position{line: r.numLine, col: 1}
313 parseField:
314 for {
315 if r.TrimLeadingSpace {
316 i := bytes.IndexFunc(line, func(r rune) bool {
317 return !unicode.IsSpace(r)
318 })
319 if i < 0 {
320 i = len(line)
321 pos.col -= lengthNL(line)
322 }
323 line = line[i:]
324 pos.col += i
325 }
326 if len(line) == 0 || line[0] != '"' {
327
328 i := bytes.IndexRune(line, r.Comma)
329 field := line
330 if i >= 0 {
331 field = field[:i]
332 } else {
333 field = field[:len(field)-lengthNL(field)]
334 }
335
336 if !r.LazyQuotes {
337 if j := bytes.IndexByte(field, '"'); j >= 0 {
338 col := pos.col + j
339 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
340 break parseField
341 }
342 }
343 r.recordBuffer = append(r.recordBuffer, field...)
344 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
345 r.fieldPositions = append(r.fieldPositions, pos)
346 if i >= 0 {
347 line = line[i+commaLen:]
348 pos.col += i + commaLen
349 continue parseField
350 }
351 break parseField
352 } else {
353
354 fieldPos := pos
355 line = line[quoteLen:]
356 pos.col += quoteLen
357 for {
358 i := bytes.IndexByte(line, '"')
359 if i >= 0 {
360
361 r.recordBuffer = append(r.recordBuffer, line[:i]...)
362 line = line[i+quoteLen:]
363 pos.col += i + quoteLen
364 switch rn := nextRune(line); {
365 case rn == '"':
366
367 r.recordBuffer = append(r.recordBuffer, '"')
368 line = line[quoteLen:]
369 pos.col += quoteLen
370 case rn == r.Comma:
371
372 line = line[commaLen:]
373 pos.col += commaLen
374 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
375 r.fieldPositions = append(r.fieldPositions, fieldPos)
376 continue parseField
377 case lengthNL(line) == len(line):
378
379 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
380 r.fieldPositions = append(r.fieldPositions, fieldPos)
381 break parseField
382 case r.LazyQuotes:
383
384 r.recordBuffer = append(r.recordBuffer, '"')
385 default:
386
387 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote}
388 break parseField
389 }
390 } else if len(line) > 0 {
391
392 r.recordBuffer = append(r.recordBuffer, line...)
393 if errRead != nil {
394 break parseField
395 }
396 pos.col += len(line)
397 line, errRead = r.readLine()
398 if len(line) > 0 {
399 pos.line++
400 pos.col = 1
401 }
402 if errRead == io.EOF {
403 errRead = nil
404 }
405 } else {
406
407 if !r.LazyQuotes && errRead == nil {
408 err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote}
409 break parseField
410 }
411 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
412 r.fieldPositions = append(r.fieldPositions, fieldPos)
413 break parseField
414 }
415 }
416 }
417 }
418 if err == nil {
419 err = errRead
420 }
421
422
423
424 str := string(r.recordBuffer)
425 dst = dst[:0]
426 if cap(dst) < len(r.fieldIndexes) {
427 dst = make([]string, len(r.fieldIndexes))
428 }
429 dst = dst[:len(r.fieldIndexes)]
430 var preIdx int
431 for i, idx := range r.fieldIndexes {
432 dst[i] = str[preIdx:idx]
433 preIdx = idx
434 }
435
436
437 if r.FieldsPerRecord > 0 {
438 if len(dst) != r.FieldsPerRecord && err == nil {
439 err = &ParseError{
440 StartLine: recLine,
441 Line: recLine,
442 Column: 1,
443 Err: ErrFieldCount,
444 }
445 }
446 } else if r.FieldsPerRecord == 0 {
447 r.FieldsPerRecord = len(dst)
448 }
449 return dst, err
450 }
451
View as plain text