1
2
3
4
5 package zip
6
7 import (
8 "bufio"
9 "encoding/binary"
10 "errors"
11 "hash"
12 "hash/crc32"
13 "io"
14 "io/fs"
15 "os"
16 "path"
17 "sort"
18 "strings"
19 "sync"
20 "time"
21 )
22
23 var (
24 ErrFormat = errors.New("zip: not a valid zip file")
25 ErrAlgorithm = errors.New("zip: unsupported compression algorithm")
26 ErrChecksum = errors.New("zip: checksum error")
27 )
28
29
30 type Reader struct {
31 r io.ReaderAt
32 File []*File
33 Comment string
34 decompressors map[uint16]Decompressor
35
36
37
38 fileListOnce sync.Once
39 fileList []fileListEntry
40 }
41
42
43 type ReadCloser struct {
44 f *os.File
45 Reader
46 }
47
48
49
50
51 type File struct {
52 FileHeader
53 zip *Reader
54 zipr io.ReaderAt
55 headerOffset int64
56 zip64 bool
57 descErr error
58 }
59
60
61 func OpenReader(name string) (*ReadCloser, error) {
62 f, err := os.Open(name)
63 if err != nil {
64 return nil, err
65 }
66 fi, err := f.Stat()
67 if err != nil {
68 f.Close()
69 return nil, err
70 }
71 r := new(ReadCloser)
72 if err := r.init(f, fi.Size()); err != nil {
73 f.Close()
74 return nil, err
75 }
76 r.f = f
77 return r, nil
78 }
79
80
81
82 func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
83 if size < 0 {
84 return nil, errors.New("zip: size cannot be negative")
85 }
86 zr := new(Reader)
87 if err := zr.init(r, size); err != nil {
88 return nil, err
89 }
90 return zr, nil
91 }
92
93 func (z *Reader) init(r io.ReaderAt, size int64) error {
94 end, err := readDirectoryEnd(r, size)
95 if err != nil {
96 return err
97 }
98 z.r = r
99
100
101
102
103
104
105 if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
106 z.File = make([]*File, 0, end.directoryRecords)
107 }
108 z.Comment = end.comment
109 rs := io.NewSectionReader(r, 0, size)
110 if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
111 return err
112 }
113 buf := bufio.NewReader(rs)
114
115
116
117
118
119 for {
120 f := &File{zip: z, zipr: r}
121 err = readDirectoryHeader(f, buf)
122 if err == ErrFormat || err == io.ErrUnexpectedEOF {
123 break
124 }
125 if err != nil {
126 return err
127 }
128 f.readDataDescriptor()
129 z.File = append(z.File, f)
130 }
131 if uint16(len(z.File)) != uint16(end.directoryRecords) {
132
133
134 return err
135 }
136 return nil
137 }
138
139
140
141
142 func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) {
143 if z.decompressors == nil {
144 z.decompressors = make(map[uint16]Decompressor)
145 }
146 z.decompressors[method] = dcomp
147 }
148
149 func (z *Reader) decompressor(method uint16) Decompressor {
150 dcomp := z.decompressors[method]
151 if dcomp == nil {
152 dcomp = decompressor(method)
153 }
154 return dcomp
155 }
156
157
158 func (rc *ReadCloser) Close() error {
159 return rc.f.Close()
160 }
161
162
163
164
165
166
167 func (f *File) DataOffset() (offset int64, err error) {
168 bodyOffset, err := f.findBodyOffset()
169 if err != nil {
170 return
171 }
172 return f.headerOffset + bodyOffset, nil
173 }
174
175
176
177 func (f *File) Open() (io.ReadCloser, error) {
178 bodyOffset, err := f.findBodyOffset()
179 if err != nil {
180 return nil, err
181 }
182 size := int64(f.CompressedSize64)
183 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
184 dcomp := f.zip.decompressor(f.Method)
185 if dcomp == nil {
186 return nil, ErrAlgorithm
187 }
188 var rc io.ReadCloser = dcomp(r)
189 rc = &checksumReader{
190 rc: rc,
191 hash: crc32.NewIEEE(),
192 f: f,
193 }
194 return rc, nil
195 }
196
197
198
199 func (f *File) OpenRaw() (io.Reader, error) {
200 bodyOffset, err := f.findBodyOffset()
201 if err != nil {
202 return nil, err
203 }
204 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
205 return r, nil
206 }
207
208 func (f *File) readDataDescriptor() {
209 if !f.hasDataDescriptor() {
210 return
211 }
212
213 bodyOffset, err := f.findBodyOffset()
214 if err != nil {
215 f.descErr = err
216 return
217 }
218
219
220
221
222
223
224
225
226
227
228
229
230 zip64 := f.zip64 || f.isZip64()
231 n := int64(dataDescriptorLen)
232 if zip64 {
233 n = dataDescriptor64Len
234 }
235 size := int64(f.CompressedSize64)
236 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, n)
237 dd, err := readDataDescriptor(r, zip64)
238 if err != nil {
239 f.descErr = err
240 return
241 }
242 f.CRC32 = dd.crc32
243 }
244
245 type checksumReader struct {
246 rc io.ReadCloser
247 hash hash.Hash32
248 nread uint64
249 f *File
250 err error
251 }
252
253 func (r *checksumReader) Stat() (fs.FileInfo, error) {
254 return headerFileInfo{&r.f.FileHeader}, nil
255 }
256
257 func (r *checksumReader) Read(b []byte) (n int, err error) {
258 if r.err != nil {
259 return 0, r.err
260 }
261 n, err = r.rc.Read(b)
262 r.hash.Write(b[:n])
263 r.nread += uint64(n)
264 if err == nil {
265 return
266 }
267 if err == io.EOF {
268 if r.nread != r.f.UncompressedSize64 {
269 return 0, io.ErrUnexpectedEOF
270 }
271 if r.f.hasDataDescriptor() {
272 if r.f.descErr != nil {
273 if r.f.descErr == io.EOF {
274 err = io.ErrUnexpectedEOF
275 } else {
276 err = r.f.descErr
277 }
278 } else if r.hash.Sum32() != r.f.CRC32 {
279 err = ErrChecksum
280 }
281 } else {
282
283
284
285 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
286 err = ErrChecksum
287 }
288 }
289 }
290 r.err = err
291 return
292 }
293
294 func (r *checksumReader) Close() error { return r.rc.Close() }
295
296
297
298 func (f *File) findBodyOffset() (int64, error) {
299 var buf [fileHeaderLen]byte
300 if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil {
301 return 0, err
302 }
303 b := readBuf(buf[:])
304 if sig := b.uint32(); sig != fileHeaderSignature {
305 return 0, ErrFormat
306 }
307 b = b[22:]
308 filenameLen := int(b.uint16())
309 extraLen := int(b.uint16())
310 return int64(fileHeaderLen + filenameLen + extraLen), nil
311 }
312
313
314
315
316 func readDirectoryHeader(f *File, r io.Reader) error {
317 var buf [directoryHeaderLen]byte
318 if _, err := io.ReadFull(r, buf[:]); err != nil {
319 return err
320 }
321 b := readBuf(buf[:])
322 if sig := b.uint32(); sig != directoryHeaderSignature {
323 return ErrFormat
324 }
325 f.CreatorVersion = b.uint16()
326 f.ReaderVersion = b.uint16()
327 f.Flags = b.uint16()
328 f.Method = b.uint16()
329 f.ModifiedTime = b.uint16()
330 f.ModifiedDate = b.uint16()
331 f.CRC32 = b.uint32()
332 f.CompressedSize = b.uint32()
333 f.UncompressedSize = b.uint32()
334 f.CompressedSize64 = uint64(f.CompressedSize)
335 f.UncompressedSize64 = uint64(f.UncompressedSize)
336 filenameLen := int(b.uint16())
337 extraLen := int(b.uint16())
338 commentLen := int(b.uint16())
339 b = b[4:]
340 f.ExternalAttrs = b.uint32()
341 f.headerOffset = int64(b.uint32())
342 d := make([]byte, filenameLen+extraLen+commentLen)
343 if _, err := io.ReadFull(r, d); err != nil {
344 return err
345 }
346 f.Name = string(d[:filenameLen])
347 f.Extra = d[filenameLen : filenameLen+extraLen]
348 f.Comment = string(d[filenameLen+extraLen:])
349
350
351 utf8Valid1, utf8Require1 := detectUTF8(f.Name)
352 utf8Valid2, utf8Require2 := detectUTF8(f.Comment)
353 switch {
354 case !utf8Valid1 || !utf8Valid2:
355
356 f.NonUTF8 = true
357 case !utf8Require1 && !utf8Require2:
358
359 f.NonUTF8 = false
360 default:
361
362
363
364
365 f.NonUTF8 = f.Flags&0x800 == 0
366 }
367
368 needUSize := f.UncompressedSize == ^uint32(0)
369 needCSize := f.CompressedSize == ^uint32(0)
370 needHeaderOffset := f.headerOffset == int64(^uint32(0))
371
372
373
374
375 var modified time.Time
376 parseExtras:
377 for extra := readBuf(f.Extra); len(extra) >= 4; {
378 fieldTag := extra.uint16()
379 fieldSize := int(extra.uint16())
380 if len(extra) < fieldSize {
381 break
382 }
383 fieldBuf := extra.sub(fieldSize)
384
385 switch fieldTag {
386 case zip64ExtraID:
387 f.zip64 = true
388
389
390
391
392
393 if needUSize {
394 needUSize = false
395 if len(fieldBuf) < 8 {
396 return ErrFormat
397 }
398 f.UncompressedSize64 = fieldBuf.uint64()
399 }
400 if needCSize {
401 needCSize = false
402 if len(fieldBuf) < 8 {
403 return ErrFormat
404 }
405 f.CompressedSize64 = fieldBuf.uint64()
406 }
407 if needHeaderOffset {
408 needHeaderOffset = false
409 if len(fieldBuf) < 8 {
410 return ErrFormat
411 }
412 f.headerOffset = int64(fieldBuf.uint64())
413 }
414 case ntfsExtraID:
415 if len(fieldBuf) < 4 {
416 continue parseExtras
417 }
418 fieldBuf.uint32()
419 for len(fieldBuf) >= 4 {
420 attrTag := fieldBuf.uint16()
421 attrSize := int(fieldBuf.uint16())
422 if len(fieldBuf) < attrSize {
423 continue parseExtras
424 }
425 attrBuf := fieldBuf.sub(attrSize)
426 if attrTag != 1 || attrSize != 24 {
427 continue
428 }
429
430 const ticksPerSecond = 1e7
431 ts := int64(attrBuf.uint64())
432 secs := int64(ts / ticksPerSecond)
433 nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
434 epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
435 modified = time.Unix(epoch.Unix()+secs, nsecs)
436 }
437 case unixExtraID, infoZipUnixExtraID:
438 if len(fieldBuf) < 8 {
439 continue parseExtras
440 }
441 fieldBuf.uint32()
442 ts := int64(fieldBuf.uint32())
443 modified = time.Unix(ts, 0)
444 case extTimeExtraID:
445 if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
446 continue parseExtras
447 }
448 ts := int64(fieldBuf.uint32())
449 modified = time.Unix(ts, 0)
450 }
451 }
452
453 msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
454 f.Modified = msdosModified
455 if !modified.IsZero() {
456 f.Modified = modified.UTC()
457
458
459
460
461
462
463
464
465
466 if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
467 f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
468 }
469 }
470
471
472
473
474
475
476
477
478
479 _ = needUSize
480
481 if needCSize || needHeaderOffset {
482 return ErrFormat
483 }
484
485 return nil
486 }
487
488 func readDataDescriptor(r io.Reader, zip64 bool) (*dataDescriptor, error) {
489
490 var buf [dataDescriptor64Len]byte
491
492
493
494
495
496
497
498
499
500
501 if _, err := io.ReadFull(r, buf[:4]); err != nil {
502 return nil, err
503 }
504 off := 0
505 maybeSig := readBuf(buf[:4])
506 if maybeSig.uint32() != dataDescriptorSignature {
507
508
509 off += 4
510 }
511
512 end := dataDescriptorLen - 4
513 if zip64 {
514 end = dataDescriptor64Len - 4
515 }
516 if _, err := io.ReadFull(r, buf[off:end]); err != nil {
517 return nil, err
518 }
519 b := readBuf(buf[:end])
520
521 out := &dataDescriptor{
522 crc32: b.uint32(),
523 }
524
525 if zip64 {
526 out.compressedSize = b.uint64()
527 out.uncompressedSize = b.uint64()
528 } else {
529 out.compressedSize = uint64(b.uint32())
530 out.uncompressedSize = uint64(b.uint32())
531 }
532 return out, nil
533 }
534
535 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {
536
537 var buf []byte
538 var directoryEndOffset int64
539 for i, bLen := range []int64{1024, 65 * 1024} {
540 if bLen > size {
541 bLen = size
542 }
543 buf = make([]byte, int(bLen))
544 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
545 return nil, err
546 }
547 if p := findSignatureInBlock(buf); p >= 0 {
548 buf = buf[p:]
549 directoryEndOffset = size - bLen + int64(p)
550 break
551 }
552 if i == 1 || bLen == size {
553 return nil, ErrFormat
554 }
555 }
556
557
558 b := readBuf(buf[4:])
559 d := &directoryEnd{
560 diskNbr: uint32(b.uint16()),
561 dirDiskNbr: uint32(b.uint16()),
562 dirRecordsThisDisk: uint64(b.uint16()),
563 directoryRecords: uint64(b.uint16()),
564 directorySize: uint64(b.uint32()),
565 directoryOffset: uint64(b.uint32()),
566 commentLen: b.uint16(),
567 }
568 l := int(d.commentLen)
569 if l > len(b) {
570 return nil, errors.New("zip: invalid comment length")
571 }
572 d.comment = string(b[:l])
573
574
575 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
576 p, err := findDirectory64End(r, directoryEndOffset)
577 if err == nil && p >= 0 {
578 err = readDirectory64End(r, p, d)
579 }
580 if err != nil {
581 return nil, err
582 }
583 }
584
585 if o := int64(d.directoryOffset); o < 0 || o >= size {
586 return nil, ErrFormat
587 }
588 return d, nil
589 }
590
591
592
593
594 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
595 locOffset := directoryEndOffset - directory64LocLen
596 if locOffset < 0 {
597 return -1, nil
598 }
599 buf := make([]byte, directory64LocLen)
600 if _, err := r.ReadAt(buf, locOffset); err != nil {
601 return -1, err
602 }
603 b := readBuf(buf)
604 if sig := b.uint32(); sig != directory64LocSignature {
605 return -1, nil
606 }
607 if b.uint32() != 0 {
608 return -1, nil
609 }
610 p := b.uint64()
611 if b.uint32() != 1 {
612 return -1, nil
613 }
614 return int64(p), nil
615 }
616
617
618
619 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
620 buf := make([]byte, directory64EndLen)
621 if _, err := r.ReadAt(buf, offset); err != nil {
622 return err
623 }
624
625 b := readBuf(buf)
626 if sig := b.uint32(); sig != directory64EndSignature {
627 return ErrFormat
628 }
629
630 b = b[12:]
631 d.diskNbr = b.uint32()
632 d.dirDiskNbr = b.uint32()
633 d.dirRecordsThisDisk = b.uint64()
634 d.directoryRecords = b.uint64()
635 d.directorySize = b.uint64()
636 d.directoryOffset = b.uint64()
637
638 return nil
639 }
640
641 func findSignatureInBlock(b []byte) int {
642 for i := len(b) - directoryEndLen; i >= 0; i-- {
643
644 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
645
646 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
647 if n+directoryEndLen+i <= len(b) {
648 return i
649 }
650 }
651 }
652 return -1
653 }
654
655 type readBuf []byte
656
657 func (b *readBuf) uint8() uint8 {
658 v := (*b)[0]
659 *b = (*b)[1:]
660 return v
661 }
662
663 func (b *readBuf) uint16() uint16 {
664 v := binary.LittleEndian.Uint16(*b)
665 *b = (*b)[2:]
666 return v
667 }
668
669 func (b *readBuf) uint32() uint32 {
670 v := binary.LittleEndian.Uint32(*b)
671 *b = (*b)[4:]
672 return v
673 }
674
675 func (b *readBuf) uint64() uint64 {
676 v := binary.LittleEndian.Uint64(*b)
677 *b = (*b)[8:]
678 return v
679 }
680
681 func (b *readBuf) sub(n int) readBuf {
682 b2 := (*b)[:n]
683 *b = (*b)[n:]
684 return b2
685 }
686
687
688
689 type fileListEntry struct {
690 name string
691 file *File
692 isDir bool
693 }
694
695 type fileInfoDirEntry interface {
696 fs.FileInfo
697 fs.DirEntry
698 }
699
700 func (e *fileListEntry) stat() fileInfoDirEntry {
701 if !e.isDir {
702 return headerFileInfo{&e.file.FileHeader}
703 }
704 return e
705 }
706
707
708 func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem }
709 func (f *fileListEntry) Size() int64 { return 0 }
710 func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 }
711 func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir }
712 func (f *fileListEntry) IsDir() bool { return true }
713 func (f *fileListEntry) Sys() interface{} { return nil }
714
715 func (f *fileListEntry) ModTime() time.Time {
716 if f.file == nil {
717 return time.Time{}
718 }
719 return f.file.FileHeader.Modified.UTC()
720 }
721
722 func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil }
723
724
725 func toValidName(name string) string {
726 name = strings.ReplaceAll(name, `\`, `/`)
727 p := path.Clean(name)
728 if strings.HasPrefix(p, "/") {
729 p = p[len("/"):]
730 }
731 for strings.HasPrefix(p, "../") {
732 p = p[len("../"):]
733 }
734 return p
735 }
736
737 func (r *Reader) initFileList() {
738 r.fileListOnce.Do(func() {
739 dirs := make(map[string]bool)
740 knownDirs := make(map[string]bool)
741 for _, file := range r.File {
742 isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
743 name := toValidName(file.Name)
744 if name == "" {
745 continue
746 }
747 for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
748 dirs[dir] = true
749 }
750 entry := fileListEntry{
751 name: name,
752 file: file,
753 isDir: isDir,
754 }
755 r.fileList = append(r.fileList, entry)
756 if isDir {
757 knownDirs[name] = true
758 }
759 }
760 for dir := range dirs {
761 if !knownDirs[dir] {
762 entry := fileListEntry{
763 name: dir,
764 file: nil,
765 isDir: true,
766 }
767 r.fileList = append(r.fileList, entry)
768 }
769 }
770
771 sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) })
772 })
773 }
774
775 func fileEntryLess(x, y string) bool {
776 xdir, xelem, _ := split(x)
777 ydir, yelem, _ := split(y)
778 return xdir < ydir || xdir == ydir && xelem < yelem
779 }
780
781
782
783
784
785 func (r *Reader) Open(name string) (fs.File, error) {
786 r.initFileList()
787
788 if !fs.ValidPath(name) {
789 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
790 }
791 e := r.openLookup(name)
792 if e == nil {
793 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
794 }
795 if e.isDir {
796 return &openDir{e, r.openReadDir(name), 0}, nil
797 }
798 rc, err := e.file.Open()
799 if err != nil {
800 return nil, err
801 }
802 return rc.(fs.File), nil
803 }
804
805 func split(name string) (dir, elem string, isDir bool) {
806 if len(name) > 0 && name[len(name)-1] == '/' {
807 isDir = true
808 name = name[:len(name)-1]
809 }
810 i := len(name) - 1
811 for i >= 0 && name[i] != '/' {
812 i--
813 }
814 if i < 0 {
815 return ".", name, isDir
816 }
817 return name[:i], name[i+1:], isDir
818 }
819
820 var dotFile = &fileListEntry{name: "./", isDir: true}
821
822 func (r *Reader) openLookup(name string) *fileListEntry {
823 if name == "." {
824 return dotFile
825 }
826
827 dir, elem, _ := split(name)
828 files := r.fileList
829 i := sort.Search(len(files), func(i int) bool {
830 idir, ielem, _ := split(files[i].name)
831 return idir > dir || idir == dir && ielem >= elem
832 })
833 if i < len(files) {
834 fname := files[i].name
835 if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name {
836 return &files[i]
837 }
838 }
839 return nil
840 }
841
842 func (r *Reader) openReadDir(dir string) []fileListEntry {
843 files := r.fileList
844 i := sort.Search(len(files), func(i int) bool {
845 idir, _, _ := split(files[i].name)
846 return idir >= dir
847 })
848 j := sort.Search(len(files), func(j int) bool {
849 jdir, _, _ := split(files[j].name)
850 return jdir > dir
851 })
852 return files[i:j]
853 }
854
855 type openDir struct {
856 e *fileListEntry
857 files []fileListEntry
858 offset int
859 }
860
861 func (d *openDir) Close() error { return nil }
862 func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil }
863
864 func (d *openDir) Read([]byte) (int, error) {
865 return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
866 }
867
868 func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
869 n := len(d.files) - d.offset
870 if count > 0 && n > count {
871 n = count
872 }
873 if n == 0 {
874 if count <= 0 {
875 return nil, nil
876 }
877 return nil, io.EOF
878 }
879 list := make([]fs.DirEntry, n)
880 for i := range list {
881 list[i] = d.files[d.offset+i].stat()
882 }
883 d.offset += n
884 return list, nil
885 }
886
View as plain text