reader.go

Documentation: archive/zip

		 1  // Copyright 2010 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package zip
		 6  
		 7  import (
		 8  	"bufio"
		 9  	"encoding/binary"
		10  	"errors"
		11  	"hash"
		12  	"hash/crc32"
		13  	"io"
		14  	"io/fs"
		15  	"os"
		16  	"path"
		17  	"sort"
		18  	"strings"
		19  	"sync"
		20  	"time"
		21  )
		22  
		23  var (
		24  	ErrFormat		= errors.New("zip: not a valid zip file")
		25  	ErrAlgorithm = errors.New("zip: unsupported compression algorithm")
		26  	ErrChecksum	= errors.New("zip: checksum error")
		27  )
		28  
		29  // A Reader serves content from a ZIP archive.
		30  type Reader struct {
		31  	r						 io.ReaderAt
		32  	File					[]*File
		33  	Comment			 string
		34  	decompressors map[uint16]Decompressor
		35  
		36  	// fileList is a list of files sorted by ename,
		37  	// for use by the Open method.
		38  	fileListOnce sync.Once
		39  	fileList		 []fileListEntry
		40  }
		41  
		42  // A ReadCloser is a Reader that must be closed when no longer needed.
		43  type ReadCloser struct {
		44  	f *os.File
		45  	Reader
		46  }
		47  
		48  // A File is a single file in a ZIP archive.
		49  // The file information is in the embedded FileHeader.
		50  // The file content can be accessed by calling Open.
		51  type File struct {
		52  	FileHeader
		53  	zip					*Reader
		54  	zipr				 io.ReaderAt
		55  	headerOffset int64
		56  	zip64				bool	// zip64 extended information extra field presence
		57  	descErr			error // error reading the data descriptor during init
		58  }
		59  
		60  // OpenReader will open the Zip file specified by name and return a ReadCloser.
		61  func OpenReader(name string) (*ReadCloser, error) {
		62  	f, err := os.Open(name)
		63  	if err != nil {
		64  		return nil, err
		65  	}
		66  	fi, err := f.Stat()
		67  	if err != nil {
		68  		f.Close()
		69  		return nil, err
		70  	}
		71  	r := new(ReadCloser)
		72  	if err := r.init(f, fi.Size()); err != nil {
		73  		f.Close()
		74  		return nil, err
		75  	}
		76  	r.f = f
		77  	return r, nil
		78  }
		79  
		80  // NewReader returns a new Reader reading from r, which is assumed to
		81  // have the given size in bytes.
		82  func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
		83  	if size < 0 {
		84  		return nil, errors.New("zip: size cannot be negative")
		85  	}
		86  	zr := new(Reader)
		87  	if err := zr.init(r, size); err != nil {
		88  		return nil, err
		89  	}
		90  	return zr, nil
		91  }
		92  
		93  func (z *Reader) init(r io.ReaderAt, size int64) error {
		94  	end, err := readDirectoryEnd(r, size)
		95  	if err != nil {
		96  		return err
		97  	}
		98  	z.r = r
		99  	// Since the number of directory records is not validated, it is not
	 100  	// safe to preallocate z.File without first checking that the specified
	 101  	// number of files is reasonable, since a malformed archive may
	 102  	// indicate it contains up to 1 << 128 - 1 files. Since each file has a
	 103  	// header which will be _at least_ 30 bytes we can safely preallocate
	 104  	// if (data size / 30) >= end.directoryRecords.
	 105  	if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
	 106  		z.File = make([]*File, 0, end.directoryRecords)
	 107  	}
	 108  	z.Comment = end.comment
	 109  	rs := io.NewSectionReader(r, 0, size)
	 110  	if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
	 111  		return err
	 112  	}
	 113  	buf := bufio.NewReader(rs)
	 114  
	 115  	// The count of files inside a zip is truncated to fit in a uint16.
	 116  	// Gloss over this by reading headers until we encounter
	 117  	// a bad one, and then only report an ErrFormat or UnexpectedEOF if
	 118  	// the file count modulo 65536 is incorrect.
	 119  	for {
	 120  		f := &File{zip: z, zipr: r}
	 121  		err = readDirectoryHeader(f, buf)
	 122  		if err == ErrFormat || err == io.ErrUnexpectedEOF {
	 123  			break
	 124  		}
	 125  		if err != nil {
	 126  			return err
	 127  		}
	 128  		f.readDataDescriptor()
	 129  		z.File = append(z.File, f)
	 130  	}
	 131  	if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here
	 132  		// Return the readDirectoryHeader error if we read
	 133  		// the wrong number of directory entries.
	 134  		return err
	 135  	}
	 136  	return nil
	 137  }
	 138  
	 139  // RegisterDecompressor registers or overrides a custom decompressor for a
	 140  // specific method ID. If a decompressor for a given method is not found,
	 141  // Reader will default to looking up the decompressor at the package level.
	 142  func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) {
	 143  	if z.decompressors == nil {
	 144  		z.decompressors = make(map[uint16]Decompressor)
	 145  	}
	 146  	z.decompressors[method] = dcomp
	 147  }
	 148  
	 149  func (z *Reader) decompressor(method uint16) Decompressor {
	 150  	dcomp := z.decompressors[method]
	 151  	if dcomp == nil {
	 152  		dcomp = decompressor(method)
	 153  	}
	 154  	return dcomp
	 155  }
	 156  
	 157  // Close closes the Zip file, rendering it unusable for I/O.
	 158  func (rc *ReadCloser) Close() error {
	 159  	return rc.f.Close()
	 160  }
	 161  
	 162  // DataOffset returns the offset of the file's possibly-compressed
	 163  // data, relative to the beginning of the zip file.
	 164  //
	 165  // Most callers should instead use Open, which transparently
	 166  // decompresses data and verifies checksums.
	 167  func (f *File) DataOffset() (offset int64, err error) {
	 168  	bodyOffset, err := f.findBodyOffset()
	 169  	if err != nil {
	 170  		return
	 171  	}
	 172  	return f.headerOffset + bodyOffset, nil
	 173  }
	 174  
	 175  // Open returns a ReadCloser that provides access to the File's contents.
	 176  // Multiple files may be read concurrently.
	 177  func (f *File) Open() (io.ReadCloser, error) {
	 178  	bodyOffset, err := f.findBodyOffset()
	 179  	if err != nil {
	 180  		return nil, err
	 181  	}
	 182  	size := int64(f.CompressedSize64)
	 183  	r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
	 184  	dcomp := f.zip.decompressor(f.Method)
	 185  	if dcomp == nil {
	 186  		return nil, ErrAlgorithm
	 187  	}
	 188  	var rc io.ReadCloser = dcomp(r)
	 189  	rc = &checksumReader{
	 190  		rc:	 rc,
	 191  		hash: crc32.NewIEEE(),
	 192  		f:		f,
	 193  	}
	 194  	return rc, nil
	 195  }
	 196  
	 197  // OpenRaw returns a Reader that provides access to the File's contents without
	 198  // decompression.
	 199  func (f *File) OpenRaw() (io.Reader, error) {
	 200  	bodyOffset, err := f.findBodyOffset()
	 201  	if err != nil {
	 202  		return nil, err
	 203  	}
	 204  	r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
	 205  	return r, nil
	 206  }
	 207  
	 208  func (f *File) readDataDescriptor() {
	 209  	if !f.hasDataDescriptor() {
	 210  		return
	 211  	}
	 212  
	 213  	bodyOffset, err := f.findBodyOffset()
	 214  	if err != nil {
	 215  		f.descErr = err
	 216  		return
	 217  	}
	 218  
	 219  	// In section 4.3.9.2 of the spec: "However ZIP64 format MAY be used
	 220  	// regardless of the size of a file.	When extracting, if the zip64
	 221  	// extended information extra field is present for the file the
	 222  	// compressed and uncompressed sizes will be 8 byte values."
	 223  	//
	 224  	// Historically, this package has used the compressed and uncompressed
	 225  	// sizes from the central directory to determine if the package is
	 226  	// zip64.
	 227  	//
	 228  	// For this case we allow either the extra field or sizes to determine
	 229  	// the data descriptor length.
	 230  	zip64 := f.zip64 || f.isZip64()
	 231  	n := int64(dataDescriptorLen)
	 232  	if zip64 {
	 233  		n = dataDescriptor64Len
	 234  	}
	 235  	size := int64(f.CompressedSize64)
	 236  	r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, n)
	 237  	dd, err := readDataDescriptor(r, zip64)
	 238  	if err != nil {
	 239  		f.descErr = err
	 240  		return
	 241  	}
	 242  	f.CRC32 = dd.crc32
	 243  }
	 244  
	 245  type checksumReader struct {
	 246  	rc		io.ReadCloser
	 247  	hash	hash.Hash32
	 248  	nread uint64 // number of bytes read so far
	 249  	f		 *File
	 250  	err	 error // sticky error
	 251  }
	 252  
	 253  func (r *checksumReader) Stat() (fs.FileInfo, error) {
	 254  	return headerFileInfo{&r.f.FileHeader}, nil
	 255  }
	 256  
	 257  func (r *checksumReader) Read(b []byte) (n int, err error) {
	 258  	if r.err != nil {
	 259  		return 0, r.err
	 260  	}
	 261  	n, err = r.rc.Read(b)
	 262  	r.hash.Write(b[:n])
	 263  	r.nread += uint64(n)
	 264  	if err == nil {
	 265  		return
	 266  	}
	 267  	if err == io.EOF {
	 268  		if r.nread != r.f.UncompressedSize64 {
	 269  			return 0, io.ErrUnexpectedEOF
	 270  		}
	 271  		if r.f.hasDataDescriptor() {
	 272  			if r.f.descErr != nil {
	 273  				if r.f.descErr == io.EOF {
	 274  					err = io.ErrUnexpectedEOF
	 275  				} else {
	 276  					err = r.f.descErr
	 277  				}
	 278  			} else if r.hash.Sum32() != r.f.CRC32 {
	 279  				err = ErrChecksum
	 280  			}
	 281  		} else {
	 282  			// If there's not a data descriptor, we still compare
	 283  			// the CRC32 of what we've read against the file header
	 284  			// or TOC's CRC32, if it seems like it was set.
	 285  			if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
	 286  				err = ErrChecksum
	 287  			}
	 288  		}
	 289  	}
	 290  	r.err = err
	 291  	return
	 292  }
	 293  
	 294  func (r *checksumReader) Close() error { return r.rc.Close() }
	 295  
	 296  // findBodyOffset does the minimum work to verify the file has a header
	 297  // and returns the file body offset.
	 298  func (f *File) findBodyOffset() (int64, error) {
	 299  	var buf [fileHeaderLen]byte
	 300  	if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil {
	 301  		return 0, err
	 302  	}
	 303  	b := readBuf(buf[:])
	 304  	if sig := b.uint32(); sig != fileHeaderSignature {
	 305  		return 0, ErrFormat
	 306  	}
	 307  	b = b[22:] // skip over most of the header
	 308  	filenameLen := int(b.uint16())
	 309  	extraLen := int(b.uint16())
	 310  	return int64(fileHeaderLen + filenameLen + extraLen), nil
	 311  }
	 312  
	 313  // readDirectoryHeader attempts to read a directory header from r.
	 314  // It returns io.ErrUnexpectedEOF if it cannot read a complete header,
	 315  // and ErrFormat if it doesn't find a valid header signature.
	 316  func readDirectoryHeader(f *File, r io.Reader) error {
	 317  	var buf [directoryHeaderLen]byte
	 318  	if _, err := io.ReadFull(r, buf[:]); err != nil {
	 319  		return err
	 320  	}
	 321  	b := readBuf(buf[:])
	 322  	if sig := b.uint32(); sig != directoryHeaderSignature {
	 323  		return ErrFormat
	 324  	}
	 325  	f.CreatorVersion = b.uint16()
	 326  	f.ReaderVersion = b.uint16()
	 327  	f.Flags = b.uint16()
	 328  	f.Method = b.uint16()
	 329  	f.ModifiedTime = b.uint16()
	 330  	f.ModifiedDate = b.uint16()
	 331  	f.CRC32 = b.uint32()
	 332  	f.CompressedSize = b.uint32()
	 333  	f.UncompressedSize = b.uint32()
	 334  	f.CompressedSize64 = uint64(f.CompressedSize)
	 335  	f.UncompressedSize64 = uint64(f.UncompressedSize)
	 336  	filenameLen := int(b.uint16())
	 337  	extraLen := int(b.uint16())
	 338  	commentLen := int(b.uint16())
	 339  	b = b[4:] // skipped start disk number and internal attributes (2x uint16)
	 340  	f.ExternalAttrs = b.uint32()
	 341  	f.headerOffset = int64(b.uint32())
	 342  	d := make([]byte, filenameLen+extraLen+commentLen)
	 343  	if _, err := io.ReadFull(r, d); err != nil {
	 344  		return err
	 345  	}
	 346  	f.Name = string(d[:filenameLen])
	 347  	f.Extra = d[filenameLen : filenameLen+extraLen]
	 348  	f.Comment = string(d[filenameLen+extraLen:])
	 349  
	 350  	// Determine the character encoding.
	 351  	utf8Valid1, utf8Require1 := detectUTF8(f.Name)
	 352  	utf8Valid2, utf8Require2 := detectUTF8(f.Comment)
	 353  	switch {
	 354  	case !utf8Valid1 || !utf8Valid2:
	 355  		// Name and Comment definitely not UTF-8.
	 356  		f.NonUTF8 = true
	 357  	case !utf8Require1 && !utf8Require2:
	 358  		// Name and Comment use only single-byte runes that overlap with UTF-8.
	 359  		f.NonUTF8 = false
	 360  	default:
	 361  		// Might be UTF-8, might be some other encoding; preserve existing flag.
	 362  		// Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag.
	 363  		// Since it is impossible to always distinguish valid UTF-8 from some
	 364  		// other encoding (e.g., GBK or Shift-JIS), we trust the flag.
	 365  		f.NonUTF8 = f.Flags&0x800 == 0
	 366  	}
	 367  
	 368  	needUSize := f.UncompressedSize == ^uint32(0)
	 369  	needCSize := f.CompressedSize == ^uint32(0)
	 370  	needHeaderOffset := f.headerOffset == int64(^uint32(0))
	 371  
	 372  	// Best effort to find what we need.
	 373  	// Other zip authors might not even follow the basic format,
	 374  	// and we'll just ignore the Extra content in that case.
	 375  	var modified time.Time
	 376  parseExtras:
	 377  	for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size
	 378  		fieldTag := extra.uint16()
	 379  		fieldSize := int(extra.uint16())
	 380  		if len(extra) < fieldSize {
	 381  			break
	 382  		}
	 383  		fieldBuf := extra.sub(fieldSize)
	 384  
	 385  		switch fieldTag {
	 386  		case zip64ExtraID:
	 387  			f.zip64 = true
	 388  
	 389  			// update directory values from the zip64 extra block.
	 390  			// They should only be consulted if the sizes read earlier
	 391  			// are maxed out.
	 392  			// See golang.org/issue/13367.
	 393  			if needUSize {
	 394  				needUSize = false
	 395  				if len(fieldBuf) < 8 {
	 396  					return ErrFormat
	 397  				}
	 398  				f.UncompressedSize64 = fieldBuf.uint64()
	 399  			}
	 400  			if needCSize {
	 401  				needCSize = false
	 402  				if len(fieldBuf) < 8 {
	 403  					return ErrFormat
	 404  				}
	 405  				f.CompressedSize64 = fieldBuf.uint64()
	 406  			}
	 407  			if needHeaderOffset {
	 408  				needHeaderOffset = false
	 409  				if len(fieldBuf) < 8 {
	 410  					return ErrFormat
	 411  				}
	 412  				f.headerOffset = int64(fieldBuf.uint64())
	 413  			}
	 414  		case ntfsExtraID:
	 415  			if len(fieldBuf) < 4 {
	 416  				continue parseExtras
	 417  			}
	 418  			fieldBuf.uint32()				// reserved (ignored)
	 419  			for len(fieldBuf) >= 4 { // need at least tag and size
	 420  				attrTag := fieldBuf.uint16()
	 421  				attrSize := int(fieldBuf.uint16())
	 422  				if len(fieldBuf) < attrSize {
	 423  					continue parseExtras
	 424  				}
	 425  				attrBuf := fieldBuf.sub(attrSize)
	 426  				if attrTag != 1 || attrSize != 24 {
	 427  					continue // Ignore irrelevant attributes
	 428  				}
	 429  
	 430  				const ticksPerSecond = 1e7		// Windows timestamp resolution
	 431  				ts := int64(attrBuf.uint64()) // ModTime since Windows epoch
	 432  				secs := int64(ts / ticksPerSecond)
	 433  				nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond)
	 434  				epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
	 435  				modified = time.Unix(epoch.Unix()+secs, nsecs)
	 436  			}
	 437  		case unixExtraID, infoZipUnixExtraID:
	 438  			if len(fieldBuf) < 8 {
	 439  				continue parseExtras
	 440  			}
	 441  			fieldBuf.uint32()							// AcTime (ignored)
	 442  			ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
	 443  			modified = time.Unix(ts, 0)
	 444  		case extTimeExtraID:
	 445  			if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
	 446  				continue parseExtras
	 447  			}
	 448  			ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
	 449  			modified = time.Unix(ts, 0)
	 450  		}
	 451  	}
	 452  
	 453  	msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
	 454  	f.Modified = msdosModified
	 455  	if !modified.IsZero() {
	 456  		f.Modified = modified.UTC()
	 457  
	 458  		// If legacy MS-DOS timestamps are set, we can use the delta between
	 459  		// the legacy and extended versions to estimate timezone offset.
	 460  		//
	 461  		// A non-UTC timezone is always used (even if offset is zero).
	 462  		// Thus, FileHeader.Modified.Location() == time.UTC is useful for
	 463  		// determining whether extended timestamps are present.
	 464  		// This is necessary for users that need to do additional time
	 465  		// calculations when dealing with legacy ZIP formats.
	 466  		if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
	 467  			f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
	 468  		}
	 469  	}
	 470  
	 471  	// Assume that uncompressed size 2³²-1 could plausibly happen in
	 472  	// an old zip32 file that was sharding inputs into the largest chunks
	 473  	// possible (or is just malicious; search the web for 42.zip).
	 474  	// If needUSize is true still, it means we didn't see a zip64 extension.
	 475  	// As long as the compressed size is not also 2³²-1 (implausible)
	 476  	// and the header is not also 2³²-1 (equally implausible),
	 477  	// accept the uncompressed size 2³²-1 as valid.
	 478  	// If nothing else, this keeps archive/zip working with 42.zip.
	 479  	_ = needUSize
	 480  
	 481  	if needCSize || needHeaderOffset {
	 482  		return ErrFormat
	 483  	}
	 484  
	 485  	return nil
	 486  }
	 487  
	 488  func readDataDescriptor(r io.Reader, zip64 bool) (*dataDescriptor, error) {
	 489  	// Create enough space for the largest possible size
	 490  	var buf [dataDescriptor64Len]byte
	 491  
	 492  	// The spec says: "Although not originally assigned a
	 493  	// signature, the value 0x08074b50 has commonly been adopted
	 494  	// as a signature value for the data descriptor record.
	 495  	// Implementers should be aware that ZIP files may be
	 496  	// encountered with or without this signature marking data
	 497  	// descriptors and should account for either case when reading
	 498  	// ZIP files to ensure compatibility."
	 499  	//
	 500  	// First read just those 4 bytes to see if the signature exists.
	 501  	if _, err := io.ReadFull(r, buf[:4]); err != nil {
	 502  		return nil, err
	 503  	}
	 504  	off := 0
	 505  	maybeSig := readBuf(buf[:4])
	 506  	if maybeSig.uint32() != dataDescriptorSignature {
	 507  		// No data descriptor signature. Keep these four
	 508  		// bytes.
	 509  		off += 4
	 510  	}
	 511  
	 512  	end := dataDescriptorLen - 4
	 513  	if zip64 {
	 514  		end = dataDescriptor64Len - 4
	 515  	}
	 516  	if _, err := io.ReadFull(r, buf[off:end]); err != nil {
	 517  		return nil, err
	 518  	}
	 519  	b := readBuf(buf[:end])
	 520  
	 521  	out := &dataDescriptor{
	 522  		crc32: b.uint32(),
	 523  	}
	 524  
	 525  	if zip64 {
	 526  		out.compressedSize = b.uint64()
	 527  		out.uncompressedSize = b.uint64()
	 528  	} else {
	 529  		out.compressedSize = uint64(b.uint32())
	 530  		out.uncompressedSize = uint64(b.uint32())
	 531  	}
	 532  	return out, nil
	 533  }
	 534  
	 535  func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {
	 536  	// look for directoryEndSignature in the last 1k, then in the last 65k
	 537  	var buf []byte
	 538  	var directoryEndOffset int64
	 539  	for i, bLen := range []int64{1024, 65 * 1024} {
	 540  		if bLen > size {
	 541  			bLen = size
	 542  		}
	 543  		buf = make([]byte, int(bLen))
	 544  		if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
	 545  			return nil, err
	 546  		}
	 547  		if p := findSignatureInBlock(buf); p >= 0 {
	 548  			buf = buf[p:]
	 549  			directoryEndOffset = size - bLen + int64(p)
	 550  			break
	 551  		}
	 552  		if i == 1 || bLen == size {
	 553  			return nil, ErrFormat
	 554  		}
	 555  	}
	 556  
	 557  	// read header into struct
	 558  	b := readBuf(buf[4:]) // skip signature
	 559  	d := &directoryEnd{
	 560  		diskNbr:						uint32(b.uint16()),
	 561  		dirDiskNbr:				 uint32(b.uint16()),
	 562  		dirRecordsThisDisk: uint64(b.uint16()),
	 563  		directoryRecords:	 uint64(b.uint16()),
	 564  		directorySize:			uint64(b.uint32()),
	 565  		directoryOffset:		uint64(b.uint32()),
	 566  		commentLen:				 b.uint16(),
	 567  	}
	 568  	l := int(d.commentLen)
	 569  	if l > len(b) {
	 570  		return nil, errors.New("zip: invalid comment length")
	 571  	}
	 572  	d.comment = string(b[:l])
	 573  
	 574  	// These values mean that the file can be a zip64 file
	 575  	if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
	 576  		p, err := findDirectory64End(r, directoryEndOffset)
	 577  		if err == nil && p >= 0 {
	 578  			err = readDirectory64End(r, p, d)
	 579  		}
	 580  		if err != nil {
	 581  			return nil, err
	 582  		}
	 583  	}
	 584  	// Make sure directoryOffset points to somewhere in our file.
	 585  	if o := int64(d.directoryOffset); o < 0 || o >= size {
	 586  		return nil, ErrFormat
	 587  	}
	 588  	return d, nil
	 589  }
	 590  
	 591  // findDirectory64End tries to read the zip64 locator just before the
	 592  // directory end and returns the offset of the zip64 directory end if
	 593  // found.
	 594  func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
	 595  	locOffset := directoryEndOffset - directory64LocLen
	 596  	if locOffset < 0 {
	 597  		return -1, nil // no need to look for a header outside the file
	 598  	}
	 599  	buf := make([]byte, directory64LocLen)
	 600  	if _, err := r.ReadAt(buf, locOffset); err != nil {
	 601  		return -1, err
	 602  	}
	 603  	b := readBuf(buf)
	 604  	if sig := b.uint32(); sig != directory64LocSignature {
	 605  		return -1, nil
	 606  	}
	 607  	if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
	 608  		return -1, nil // the file is not a valid zip64-file
	 609  	}
	 610  	p := b.uint64()			// relative offset of the zip64 end of central directory record
	 611  	if b.uint32() != 1 { // total number of disks
	 612  		return -1, nil // the file is not a valid zip64-file
	 613  	}
	 614  	return int64(p), nil
	 615  }
	 616  
	 617  // readDirectory64End reads the zip64 directory end and updates the
	 618  // directory end with the zip64 directory end values.
	 619  func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
	 620  	buf := make([]byte, directory64EndLen)
	 621  	if _, err := r.ReadAt(buf, offset); err != nil {
	 622  		return err
	 623  	}
	 624  
	 625  	b := readBuf(buf)
	 626  	if sig := b.uint32(); sig != directory64EndSignature {
	 627  		return ErrFormat
	 628  	}
	 629  
	 630  	b = b[12:]												// skip dir size, version and version needed (uint64 + 2x uint16)
	 631  	d.diskNbr = b.uint32()						// number of this disk
	 632  	d.dirDiskNbr = b.uint32()				 // number of the disk with the start of the central directory
	 633  	d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
	 634  	d.directoryRecords = b.uint64()	 // total number of entries in the central directory
	 635  	d.directorySize = b.uint64()			// size of the central directory
	 636  	d.directoryOffset = b.uint64()		// offset of start of central directory with respect to the starting disk number
	 637  
	 638  	return nil
	 639  }
	 640  
	 641  func findSignatureInBlock(b []byte) int {
	 642  	for i := len(b) - directoryEndLen; i >= 0; i-- {
	 643  		// defined from directoryEndSignature in struct.go
	 644  		if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
	 645  			// n is length of comment
	 646  			n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
	 647  			if n+directoryEndLen+i <= len(b) {
	 648  				return i
	 649  			}
	 650  		}
	 651  	}
	 652  	return -1
	 653  }
	 654  
	 655  type readBuf []byte
	 656  
	 657  func (b *readBuf) uint8() uint8 {
	 658  	v := (*b)[0]
	 659  	*b = (*b)[1:]
	 660  	return v
	 661  }
	 662  
	 663  func (b *readBuf) uint16() uint16 {
	 664  	v := binary.LittleEndian.Uint16(*b)
	 665  	*b = (*b)[2:]
	 666  	return v
	 667  }
	 668  
	 669  func (b *readBuf) uint32() uint32 {
	 670  	v := binary.LittleEndian.Uint32(*b)
	 671  	*b = (*b)[4:]
	 672  	return v
	 673  }
	 674  
	 675  func (b *readBuf) uint64() uint64 {
	 676  	v := binary.LittleEndian.Uint64(*b)
	 677  	*b = (*b)[8:]
	 678  	return v
	 679  }
	 680  
	 681  func (b *readBuf) sub(n int) readBuf {
	 682  	b2 := (*b)[:n]
	 683  	*b = (*b)[n:]
	 684  	return b2
	 685  }
	 686  
	 687  // A fileListEntry is a File and its ename.
	 688  // If file == nil, the fileListEntry describes a directory without metadata.
	 689  type fileListEntry struct {
	 690  	name	string
	 691  	file	*File
	 692  	isDir bool
	 693  }
	 694  
	 695  type fileInfoDirEntry interface {
	 696  	fs.FileInfo
	 697  	fs.DirEntry
	 698  }
	 699  
	 700  func (e *fileListEntry) stat() fileInfoDirEntry {
	 701  	if !e.isDir {
	 702  		return headerFileInfo{&e.file.FileHeader}
	 703  	}
	 704  	return e
	 705  }
	 706  
	 707  // Only used for directories.
	 708  func (f *fileListEntry) Name() string			{ _, elem, _ := split(f.name); return elem }
	 709  func (f *fileListEntry) Size() int64			 { return 0 }
	 710  func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 }
	 711  func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir }
	 712  func (f *fileListEntry) IsDir() bool			 { return true }
	 713  func (f *fileListEntry) Sys() interface{}	{ return nil }
	 714  
	 715  func (f *fileListEntry) ModTime() time.Time {
	 716  	if f.file == nil {
	 717  		return time.Time{}
	 718  	}
	 719  	return f.file.FileHeader.Modified.UTC()
	 720  }
	 721  
	 722  func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil }
	 723  
	 724  // toValidName coerces name to be a valid name for fs.FS.Open.
	 725  func toValidName(name string) string {
	 726  	name = strings.ReplaceAll(name, `\`, `/`)
	 727  	p := path.Clean(name)
	 728  	if strings.HasPrefix(p, "/") {
	 729  		p = p[len("/"):]
	 730  	}
	 731  	for strings.HasPrefix(p, "../") {
	 732  		p = p[len("../"):]
	 733  	}
	 734  	return p
	 735  }
	 736  
	 737  func (r *Reader) initFileList() {
	 738  	r.fileListOnce.Do(func() {
	 739  		dirs := make(map[string]bool)
	 740  		knownDirs := make(map[string]bool)
	 741  		for _, file := range r.File {
	 742  			isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
	 743  			name := toValidName(file.Name)
	 744  			if name == "" {
	 745  				continue
	 746  			}
	 747  			for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
	 748  				dirs[dir] = true
	 749  			}
	 750  			entry := fileListEntry{
	 751  				name:	name,
	 752  				file:	file,
	 753  				isDir: isDir,
	 754  			}
	 755  			r.fileList = append(r.fileList, entry)
	 756  			if isDir {
	 757  				knownDirs[name] = true
	 758  			}
	 759  		}
	 760  		for dir := range dirs {
	 761  			if !knownDirs[dir] {
	 762  				entry := fileListEntry{
	 763  					name:	dir,
	 764  					file:	nil,
	 765  					isDir: true,
	 766  				}
	 767  				r.fileList = append(r.fileList, entry)
	 768  			}
	 769  		}
	 770  
	 771  		sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) })
	 772  	})
	 773  }
	 774  
	 775  func fileEntryLess(x, y string) bool {
	 776  	xdir, xelem, _ := split(x)
	 777  	ydir, yelem, _ := split(y)
	 778  	return xdir < ydir || xdir == ydir && xelem < yelem
	 779  }
	 780  
	 781  // Open opens the named file in the ZIP archive,
	 782  // using the semantics of fs.FS.Open:
	 783  // paths are always slash separated, with no
	 784  // leading / or ../ elements.
	 785  func (r *Reader) Open(name string) (fs.File, error) {
	 786  	r.initFileList()
	 787  
	 788  	if !fs.ValidPath(name) {
	 789  		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
	 790  	}
	 791  	e := r.openLookup(name)
	 792  	if e == nil {
	 793  		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
	 794  	}
	 795  	if e.isDir {
	 796  		return &openDir{e, r.openReadDir(name), 0}, nil
	 797  	}
	 798  	rc, err := e.file.Open()
	 799  	if err != nil {
	 800  		return nil, err
	 801  	}
	 802  	return rc.(fs.File), nil
	 803  }
	 804  
	 805  func split(name string) (dir, elem string, isDir bool) {
	 806  	if len(name) > 0 && name[len(name)-1] == '/' {
	 807  		isDir = true
	 808  		name = name[:len(name)-1]
	 809  	}
	 810  	i := len(name) - 1
	 811  	for i >= 0 && name[i] != '/' {
	 812  		i--
	 813  	}
	 814  	if i < 0 {
	 815  		return ".", name, isDir
	 816  	}
	 817  	return name[:i], name[i+1:], isDir
	 818  }
	 819  
	 820  var dotFile = &fileListEntry{name: "./", isDir: true}
	 821  
	 822  func (r *Reader) openLookup(name string) *fileListEntry {
	 823  	if name == "." {
	 824  		return dotFile
	 825  	}
	 826  
	 827  	dir, elem, _ := split(name)
	 828  	files := r.fileList
	 829  	i := sort.Search(len(files), func(i int) bool {
	 830  		idir, ielem, _ := split(files[i].name)
	 831  		return idir > dir || idir == dir && ielem >= elem
	 832  	})
	 833  	if i < len(files) {
	 834  		fname := files[i].name
	 835  		if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name {
	 836  			return &files[i]
	 837  		}
	 838  	}
	 839  	return nil
	 840  }
	 841  
	 842  func (r *Reader) openReadDir(dir string) []fileListEntry {
	 843  	files := r.fileList
	 844  	i := sort.Search(len(files), func(i int) bool {
	 845  		idir, _, _ := split(files[i].name)
	 846  		return idir >= dir
	 847  	})
	 848  	j := sort.Search(len(files), func(j int) bool {
	 849  		jdir, _, _ := split(files[j].name)
	 850  		return jdir > dir
	 851  	})
	 852  	return files[i:j]
	 853  }
	 854  
	 855  type openDir struct {
	 856  	e			*fileListEntry
	 857  	files	[]fileListEntry
	 858  	offset int
	 859  }
	 860  
	 861  func (d *openDir) Close() error							 { return nil }
	 862  func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil }
	 863  
	 864  func (d *openDir) Read([]byte) (int, error) {
	 865  	return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
	 866  }
	 867  
	 868  func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
	 869  	n := len(d.files) - d.offset
	 870  	if count > 0 && n > count {
	 871  		n = count
	 872  	}
	 873  	if n == 0 {
	 874  		if count <= 0 {
	 875  			return nil, nil
	 876  		}
	 877  		return nil, io.EOF
	 878  	}
	 879  	list := make([]fs.DirEntry, n)
	 880  	for i := range list {
	 881  		list[i] = d.files[d.offset+i].stat()
	 882  	}
	 883  	d.offset += n
	 884  	return list, nil
	 885  }
	 886
View as plain text