1
2
3
4
5 package tar
6
7 import (
8 "bytes"
9 "io"
10 "strconv"
11 "strings"
12 "time"
13 )
14
15
16
17
18 type Reader struct {
19 r io.Reader
20 pad int64
21 curr fileReader
22 blk block
23
24
25
26
27 err error
28 }
29
30 type fileReader interface {
31 io.Reader
32 fileState
33
34 WriteTo(io.Writer) (int64, error)
35 }
36
37
38 func NewReader(r io.Reader) *Reader {
39 return &Reader{r: r, curr: ®FileReader{r, 0}}
40 }
41
42
43
44
45
46
47 func (tr *Reader) Next() (*Header, error) {
48 if tr.err != nil {
49 return nil, tr.err
50 }
51 hdr, err := tr.next()
52 tr.err = err
53 return hdr, err
54 }
55
56 func (tr *Reader) next() (*Header, error) {
57 var paxHdrs map[string]string
58 var gnuLongName, gnuLongLink string
59
60
61
62
63
64
65 format := FormatUSTAR | FormatPAX | FormatGNU
66 for {
67
68 if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
69 return nil, err
70 }
71 if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
72 return nil, err
73 }
74 tr.pad = 0
75
76 hdr, rawHdr, err := tr.readHeader()
77 if err != nil {
78 return nil, err
79 }
80 if err := tr.handleRegularFile(hdr); err != nil {
81 return nil, err
82 }
83 format.mayOnlyBe(hdr.Format)
84
85
86 switch hdr.Typeflag {
87 case TypeXHeader, TypeXGlobalHeader:
88 format.mayOnlyBe(FormatPAX)
89 paxHdrs, err = parsePAX(tr)
90 if err != nil {
91 return nil, err
92 }
93 if hdr.Typeflag == TypeXGlobalHeader {
94 mergePAX(hdr, paxHdrs)
95 return &Header{
96 Name: hdr.Name,
97 Typeflag: hdr.Typeflag,
98 Xattrs: hdr.Xattrs,
99 PAXRecords: hdr.PAXRecords,
100 Format: format,
101 }, nil
102 }
103 continue
104 case TypeGNULongName, TypeGNULongLink:
105 format.mayOnlyBe(FormatGNU)
106 realname, err := io.ReadAll(tr)
107 if err != nil {
108 return nil, err
109 }
110
111 var p parser
112 switch hdr.Typeflag {
113 case TypeGNULongName:
114 gnuLongName = p.parseString(realname)
115 case TypeGNULongLink:
116 gnuLongLink = p.parseString(realname)
117 }
118 continue
119 default:
120
121
122
123 if err := mergePAX(hdr, paxHdrs); err != nil {
124 return nil, err
125 }
126 if gnuLongName != "" {
127 hdr.Name = gnuLongName
128 }
129 if gnuLongLink != "" {
130 hdr.Linkname = gnuLongLink
131 }
132 if hdr.Typeflag == TypeRegA {
133 if strings.HasSuffix(hdr.Name, "/") {
134 hdr.Typeflag = TypeDir
135 } else {
136 hdr.Typeflag = TypeReg
137 }
138 }
139
140
141
142 if err := tr.handleRegularFile(hdr); err != nil {
143 return nil, err
144 }
145
146
147
148 if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
149 return nil, err
150 }
151
152
153 if format.has(FormatUSTAR) && format.has(FormatPAX) {
154 format.mayOnlyBe(FormatUSTAR)
155 }
156 hdr.Format = format
157 return hdr, nil
158 }
159 }
160 }
161
162
163
164
165 func (tr *Reader) handleRegularFile(hdr *Header) error {
166 nb := hdr.Size
167 if isHeaderOnlyType(hdr.Typeflag) {
168 nb = 0
169 }
170 if nb < 0 {
171 return ErrHeader
172 }
173
174 tr.pad = blockPadding(nb)
175 tr.curr = ®FileReader{r: tr.r, nb: nb}
176 return nil
177 }
178
179
180
181 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
182 var spd sparseDatas
183 var err error
184 if hdr.Typeflag == TypeGNUSparse {
185 spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
186 } else {
187 spd, err = tr.readGNUSparsePAXHeaders(hdr)
188 }
189
190
191
192 if err == nil && spd != nil {
193 if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
194 return ErrHeader
195 }
196 sph := invertSparseEntries(spd, hdr.Size)
197 tr.curr = &sparseFileReader{tr.curr, sph, 0}
198 }
199 return err
200 }
201
202
203
204
205
206 func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
207
208 var is1x0 bool
209 major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
210 switch {
211 case major == "0" && (minor == "0" || minor == "1"):
212 is1x0 = false
213 case major == "1" && minor == "0":
214 is1x0 = true
215 case major != "" || minor != "":
216 return nil, nil
217 case hdr.PAXRecords[paxGNUSparseMap] != "":
218 is1x0 = false
219 default:
220 return nil, nil
221 }
222 hdr.Format.mayOnlyBe(FormatPAX)
223
224
225 if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
226 hdr.Name = name
227 }
228 size := hdr.PAXRecords[paxGNUSparseSize]
229 if size == "" {
230 size = hdr.PAXRecords[paxGNUSparseRealSize]
231 }
232 if size != "" {
233 n, err := strconv.ParseInt(size, 10, 64)
234 if err != nil {
235 return nil, ErrHeader
236 }
237 hdr.Size = n
238 }
239
240
241 if is1x0 {
242 return readGNUSparseMap1x0(tr.curr)
243 }
244 return readGNUSparseMap0x1(hdr.PAXRecords)
245 }
246
247
248 func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
249 for k, v := range paxHdrs {
250 if v == "" {
251 continue
252 }
253 var id64 int64
254 switch k {
255 case paxPath:
256 hdr.Name = v
257 case paxLinkpath:
258 hdr.Linkname = v
259 case paxUname:
260 hdr.Uname = v
261 case paxGname:
262 hdr.Gname = v
263 case paxUid:
264 id64, err = strconv.ParseInt(v, 10, 64)
265 hdr.Uid = int(id64)
266 case paxGid:
267 id64, err = strconv.ParseInt(v, 10, 64)
268 hdr.Gid = int(id64)
269 case paxAtime:
270 hdr.AccessTime, err = parsePAXTime(v)
271 case paxMtime:
272 hdr.ModTime, err = parsePAXTime(v)
273 case paxCtime:
274 hdr.ChangeTime, err = parsePAXTime(v)
275 case paxSize:
276 hdr.Size, err = strconv.ParseInt(v, 10, 64)
277 default:
278 if strings.HasPrefix(k, paxSchilyXattr) {
279 if hdr.Xattrs == nil {
280 hdr.Xattrs = make(map[string]string)
281 }
282 hdr.Xattrs[k[len(paxSchilyXattr):]] = v
283 }
284 }
285 if err != nil {
286 return ErrHeader
287 }
288 }
289 hdr.PAXRecords = paxHdrs
290 return nil
291 }
292
293
294
295 func parsePAX(r io.Reader) (map[string]string, error) {
296 buf, err := io.ReadAll(r)
297 if err != nil {
298 return nil, err
299 }
300 sbuf := string(buf)
301
302
303
304
305 var sparseMap []string
306
307 paxHdrs := make(map[string]string)
308 for len(sbuf) > 0 {
309 key, value, residual, err := parsePAXRecord(sbuf)
310 if err != nil {
311 return nil, ErrHeader
312 }
313 sbuf = residual
314
315 switch key {
316 case paxGNUSparseOffset, paxGNUSparseNumBytes:
317
318 if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
319 (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
320 strings.Contains(value, ",") {
321 return nil, ErrHeader
322 }
323 sparseMap = append(sparseMap, value)
324 default:
325 paxHdrs[key] = value
326 }
327 }
328 if len(sparseMap) > 0 {
329 paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
330 }
331 return paxHdrs, nil
332 }
333
334
335
336
337
338
339
340
341
342 func (tr *Reader) readHeader() (*Header, *block, error) {
343
344 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
345 return nil, nil, err
346 }
347 if bytes.Equal(tr.blk[:], zeroBlock[:]) {
348 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
349 return nil, nil, err
350 }
351 if bytes.Equal(tr.blk[:], zeroBlock[:]) {
352 return nil, nil, io.EOF
353 }
354 return nil, nil, ErrHeader
355 }
356
357
358 format := tr.blk.GetFormat()
359 if format == FormatUnknown {
360 return nil, nil, ErrHeader
361 }
362
363 var p parser
364 hdr := new(Header)
365
366
367 v7 := tr.blk.V7()
368 hdr.Typeflag = v7.TypeFlag()[0]
369 hdr.Name = p.parseString(v7.Name())
370 hdr.Linkname = p.parseString(v7.LinkName())
371 hdr.Size = p.parseNumeric(v7.Size())
372 hdr.Mode = p.parseNumeric(v7.Mode())
373 hdr.Uid = int(p.parseNumeric(v7.UID()))
374 hdr.Gid = int(p.parseNumeric(v7.GID()))
375 hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
376
377
378 if format > formatV7 {
379 ustar := tr.blk.USTAR()
380 hdr.Uname = p.parseString(ustar.UserName())
381 hdr.Gname = p.parseString(ustar.GroupName())
382 hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
383 hdr.Devminor = p.parseNumeric(ustar.DevMinor())
384
385 var prefix string
386 switch {
387 case format.has(FormatUSTAR | FormatPAX):
388 hdr.Format = format
389 ustar := tr.blk.USTAR()
390 prefix = p.parseString(ustar.Prefix())
391
392
393
394 notASCII := func(r rune) bool { return r >= 0x80 }
395 if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
396 hdr.Format = FormatUnknown
397 }
398 nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
399 if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) &&
400 nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) {
401 hdr.Format = FormatUnknown
402 }
403 case format.has(formatSTAR):
404 star := tr.blk.STAR()
405 prefix = p.parseString(star.Prefix())
406 hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
407 hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
408 case format.has(FormatGNU):
409 hdr.Format = format
410 var p2 parser
411 gnu := tr.blk.GNU()
412 if b := gnu.AccessTime(); b[0] != 0 {
413 hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
414 }
415 if b := gnu.ChangeTime(); b[0] != 0 {
416 hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
417 }
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440 if p2.err != nil {
441 hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
442 ustar := tr.blk.USTAR()
443 if s := p.parseString(ustar.Prefix()); isASCII(s) {
444 prefix = s
445 }
446 hdr.Format = FormatUnknown
447 }
448 }
449 if len(prefix) > 0 {
450 hdr.Name = prefix + "/" + hdr.Name
451 }
452 }
453 return hdr, &tr.blk, p.err
454 }
455
456
457
458
459
460
461
462
463
464 func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
465
466
467
468 if blk.GetFormat() != FormatGNU {
469 return nil, ErrHeader
470 }
471 hdr.Format.mayOnlyBe(FormatGNU)
472
473 var p parser
474 hdr.Size = p.parseNumeric(blk.GNU().RealSize())
475 if p.err != nil {
476 return nil, p.err
477 }
478 s := blk.GNU().Sparse()
479 spd := make(sparseDatas, 0, s.MaxEntries())
480 for {
481 for i := 0; i < s.MaxEntries(); i++ {
482
483 if s.Entry(i).Offset()[0] == 0x00 {
484 break
485 }
486 offset := p.parseNumeric(s.Entry(i).Offset())
487 length := p.parseNumeric(s.Entry(i).Length())
488 if p.err != nil {
489 return nil, p.err
490 }
491 spd = append(spd, sparseEntry{Offset: offset, Length: length})
492 }
493
494 if s.IsExtended()[0] > 0 {
495
496 if _, err := mustReadFull(tr.r, blk[:]); err != nil {
497 return nil, err
498 }
499 s = blk.Sparse()
500 continue
501 }
502 return spd, nil
503 }
504 }
505
506
507
508
509
510
511
512
513
514
515
516 func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
517 var (
518 cntNewline int64
519 buf bytes.Buffer
520 blk block
521 )
522
523
524
525 feedTokens := func(n int64) error {
526 for cntNewline < n {
527 if _, err := mustReadFull(r, blk[:]); err != nil {
528 return err
529 }
530 buf.Write(blk[:])
531 for _, c := range blk {
532 if c == '\n' {
533 cntNewline++
534 }
535 }
536 }
537 return nil
538 }
539
540
541
542 nextToken := func() string {
543 cntNewline--
544 tok, _ := buf.ReadString('\n')
545 return strings.TrimRight(tok, "\n")
546 }
547
548
549
550 if err := feedTokens(1); err != nil {
551 return nil, err
552 }
553 numEntries, err := strconv.ParseInt(nextToken(), 10, 0)
554 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
555 return nil, ErrHeader
556 }
557
558
559
560
561 if err := feedTokens(2 * numEntries); err != nil {
562 return nil, err
563 }
564 spd := make(sparseDatas, 0, numEntries)
565 for i := int64(0); i < numEntries; i++ {
566 offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
567 length, err2 := strconv.ParseInt(nextToken(), 10, 64)
568 if err1 != nil || err2 != nil {
569 return nil, ErrHeader
570 }
571 spd = append(spd, sparseEntry{Offset: offset, Length: length})
572 }
573 return spd, nil
574 }
575
576
577
578 func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
579
580
581 numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
582 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
583 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
584 return nil, ErrHeader
585 }
586
587
588 sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
589 if len(sparseMap) == 1 && sparseMap[0] == "" {
590 sparseMap = sparseMap[:0]
591 }
592 if int64(len(sparseMap)) != 2*numEntries {
593 return nil, ErrHeader
594 }
595
596
597
598 spd := make(sparseDatas, 0, numEntries)
599 for len(sparseMap) >= 2 {
600 offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
601 length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
602 if err1 != nil || err2 != nil {
603 return nil, ErrHeader
604 }
605 spd = append(spd, sparseEntry{Offset: offset, Length: length})
606 sparseMap = sparseMap[2:]
607 }
608 return spd, nil
609 }
610
611
612
613
614
615
616
617
618
619
620
621 func (tr *Reader) Read(b []byte) (int, error) {
622 if tr.err != nil {
623 return 0, tr.err
624 }
625 n, err := tr.curr.Read(b)
626 if err != nil && err != io.EOF {
627 tr.err = err
628 }
629 return n, err
630 }
631
632
633
634
635
636
637
638
639
640
641
642 func (tr *Reader) writeTo(w io.Writer) (int64, error) {
643 if tr.err != nil {
644 return 0, tr.err
645 }
646 n, err := tr.curr.WriteTo(w)
647 if err != nil {
648 tr.err = err
649 }
650 return n, err
651 }
652
653
654 type regFileReader struct {
655 r io.Reader
656 nb int64
657 }
658
659 func (fr *regFileReader) Read(b []byte) (n int, err error) {
660 if int64(len(b)) > fr.nb {
661 b = b[:fr.nb]
662 }
663 if len(b) > 0 {
664 n, err = fr.r.Read(b)
665 fr.nb -= int64(n)
666 }
667 switch {
668 case err == io.EOF && fr.nb > 0:
669 return n, io.ErrUnexpectedEOF
670 case err == nil && fr.nb == 0:
671 return n, io.EOF
672 default:
673 return n, err
674 }
675 }
676
677 func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
678 return io.Copy(w, struct{ io.Reader }{fr})
679 }
680
681 func (fr regFileReader) LogicalRemaining() int64 {
682 return fr.nb
683 }
684
685 func (fr regFileReader) PhysicalRemaining() int64 {
686 return fr.nb
687 }
688
689
690 type sparseFileReader struct {
691 fr fileReader
692 sp sparseHoles
693 pos int64
694 }
695
696 func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
697 finished := int64(len(b)) >= sr.LogicalRemaining()
698 if finished {
699 b = b[:sr.LogicalRemaining()]
700 }
701
702 b0 := b
703 endPos := sr.pos + int64(len(b))
704 for endPos > sr.pos && err == nil {
705 var nf int
706 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
707 if sr.pos < holeStart {
708 bf := b[:min(int64(len(b)), holeStart-sr.pos)]
709 nf, err = tryReadFull(sr.fr, bf)
710 } else {
711 bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
712 nf, err = tryReadFull(zeroReader{}, bf)
713 }
714 b = b[nf:]
715 sr.pos += int64(nf)
716 if sr.pos >= holeEnd && len(sr.sp) > 1 {
717 sr.sp = sr.sp[1:]
718 }
719 }
720
721 n = len(b0) - len(b)
722 switch {
723 case err == io.EOF:
724 return n, errMissData
725 case err != nil:
726 return n, err
727 case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
728 return n, errUnrefData
729 case finished:
730 return n, io.EOF
731 default:
732 return n, nil
733 }
734 }
735
736 func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
737 ws, ok := w.(io.WriteSeeker)
738 if ok {
739 if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
740 ok = false
741 }
742 }
743 if !ok {
744 return io.Copy(w, struct{ io.Reader }{sr})
745 }
746
747 var writeLastByte bool
748 pos0 := sr.pos
749 for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
750 var nf int64
751 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
752 if sr.pos < holeStart {
753 nf = holeStart - sr.pos
754 nf, err = io.CopyN(ws, sr.fr, nf)
755 } else {
756 nf = holeEnd - sr.pos
757 if sr.PhysicalRemaining() == 0 {
758 writeLastByte = true
759 nf--
760 }
761 _, err = ws.Seek(nf, io.SeekCurrent)
762 }
763 sr.pos += nf
764 if sr.pos >= holeEnd && len(sr.sp) > 1 {
765 sr.sp = sr.sp[1:]
766 }
767 }
768
769
770
771 if writeLastByte && err == nil {
772 _, err = ws.Write([]byte{0})
773 sr.pos++
774 }
775
776 n = sr.pos - pos0
777 switch {
778 case err == io.EOF:
779 return n, errMissData
780 case err != nil:
781 return n, err
782 case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
783 return n, errUnrefData
784 default:
785 return n, nil
786 }
787 }
788
789 func (sr sparseFileReader) LogicalRemaining() int64 {
790 return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
791 }
792 func (sr sparseFileReader) PhysicalRemaining() int64 {
793 return sr.fr.PhysicalRemaining()
794 }
795
796 type zeroReader struct{}
797
798 func (zeroReader) Read(b []byte) (int, error) {
799 for i := range b {
800 b[i] = 0
801 }
802 return len(b), nil
803 }
804
805
806
807 func mustReadFull(r io.Reader, b []byte) (int, error) {
808 n, err := tryReadFull(r, b)
809 if err == io.EOF {
810 err = io.ErrUnexpectedEOF
811 }
812 return n, err
813 }
814
815
816
817 func tryReadFull(r io.Reader, b []byte) (n int, err error) {
818 for len(b) > n && err == nil {
819 var nn int
820 nn, err = r.Read(b[n:])
821 n += nn
822 }
823 if len(b) == n && err == io.EOF {
824 err = nil
825 }
826 return n, err
827 }
828
829
830 func discard(r io.Reader, n int64) error {
831
832
833
834
835 var seekSkipped int64
836 if sr, ok := r.(io.Seeker); ok && n > 1 {
837
838
839
840
841 pos1, err := sr.Seek(0, io.SeekCurrent)
842 if pos1 >= 0 && err == nil {
843
844 pos2, err := sr.Seek(n-1, io.SeekCurrent)
845 if pos2 < 0 || err != nil {
846 return err
847 }
848 seekSkipped = pos2 - pos1
849 }
850 }
851
852 copySkipped, err := io.CopyN(io.Discard, r, n-seekSkipped)
853 if err == io.EOF && seekSkipped+copySkipped < n {
854 err = io.ErrUnexpectedEOF
855 }
856 return err
857 }
858
View as plain text