...

Source file src/archive/tar/format.go

Documentation: archive/tar

		 1  // Copyright 2016 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package tar
		 6  
		 7  import "strings"
		 8  
		 9  // Format represents the tar archive format.
		10  //
		11  // The original tar format was introduced in Unix V7.
		12  // Since then, there have been multiple competing formats attempting to
		13  // standardize or extend the V7 format to overcome its limitations.
		14  // The most common formats are the USTAR, PAX, and GNU formats,
		15  // each with their own advantages and limitations.
		16  //
		17  // The following table captures the capabilities of each format:
		18  //
		19  //										|	USTAR |			 PAX |			 GNU
		20  //	------------------+--------+-----------+----------
		21  //	Name							|	 256B | unlimited | unlimited
		22  //	Linkname					|	 100B | unlimited | unlimited
		23  //	Size							| uint33 | unlimited |		uint89
		24  //	Mode							| uint21 |		uint21 |		uint57
		25  //	Uid/Gid					 | uint21 | unlimited |		uint57
		26  //	Uname/Gname			 |		32B | unlimited |			 32B
		27  //	ModTime					 | uint33 | unlimited |		 int89
		28  //	AccessTime				|		n/a | unlimited |		 int89
		29  //	ChangeTime				|		n/a | unlimited |		 int89
		30  //	Devmajor/Devminor | uint21 |		uint21 |		uint57
		31  //	------------------+--------+-----------+----------
		32  //	string encoding	 |	ASCII |		 UTF-8 |		binary
		33  //	sub-second times	|		 no |			 yes |				no
		34  //	sparse files			|		 no |			 yes |			 yes
		35  //
		36  // The table's upper portion shows the Header fields, where each format reports
		37  // the maximum number of bytes allowed for each string field and
		38  // the integer type used to store each numeric field
		39  // (where timestamps are stored as the number of seconds since the Unix epoch).
		40  //
		41  // The table's lower portion shows specialized features of each format,
		42  // such as supported string encodings, support for sub-second timestamps,
		43  // or support for sparse files.
		44  //
		45  // The Writer currently provides no support for sparse files.
		46  type Format int
		47  
		48  // Constants to identify various tar formats.
		49  const (
		50  	// Deliberately hide the meaning of constants from public API.
		51  	_ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
		52  
		53  	// FormatUnknown indicates that the format is unknown.
		54  	FormatUnknown
		55  
		56  	// The format of the original Unix V7 tar tool prior to standardization.
		57  	formatV7
		58  
		59  	// FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
		60  	//
		61  	// While this format is compatible with most tar readers,
		62  	// the format has several limitations making it unsuitable for some usages.
		63  	// Most notably, it cannot support sparse files, files larger than 8GiB,
		64  	// filenames larger than 256 characters, and non-ASCII filenames.
		65  	//
		66  	// Reference:
		67  	//	http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
		68  	FormatUSTAR
		69  
		70  	// FormatPAX represents the PAX header format defined in POSIX.1-2001.
		71  	//
		72  	// PAX extends USTAR by writing a special file with Typeflag TypeXHeader
		73  	// preceding the original header. This file contains a set of key-value
		74  	// records, which are used to overcome USTAR's shortcomings, in addition to
		75  	// providing the ability to have sub-second resolution for timestamps.
		76  	//
		77  	// Some newer formats add their own extensions to PAX by defining their
		78  	// own keys and assigning certain semantic meaning to the associated values.
		79  	// For example, sparse file support in PAX is implemented using keys
		80  	// defined by the GNU manual (e.g., "GNU.sparse.map").
		81  	//
		82  	// Reference:
		83  	//	http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
		84  	FormatPAX
		85  
		86  	// FormatGNU represents the GNU header format.
		87  	//
		88  	// The GNU header format is older than the USTAR and PAX standards and
		89  	// is not compatible with them. The GNU format supports
		90  	// arbitrary file sizes, filenames of arbitrary encoding and length,
		91  	// sparse files, and other features.
		92  	//
		93  	// It is recommended that PAX be chosen over GNU unless the target
		94  	// application can only parse GNU formatted archives.
		95  	//
		96  	// Reference:
		97  	//	https://www.gnu.org/software/tar/manual/html_node/Standard.html
		98  	FormatGNU
		99  
	 100  	// Schily's tar format, which is incompatible with USTAR.
	 101  	// This does not cover STAR extensions to the PAX format; these fall under
	 102  	// the PAX format.
	 103  	formatSTAR
	 104  
	 105  	formatMax
	 106  )
	 107  
	 108  func (f Format) has(f2 Format) bool	 { return f&f2 != 0 }
	 109  func (f *Format) mayBe(f2 Format)		 { *f |= f2 }
	 110  func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
	 111  func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
	 112  
	 113  var formatNames = map[Format]string{
	 114  	formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
	 115  }
	 116  
	 117  func (f Format) String() string {
	 118  	var ss []string
	 119  	for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
	 120  		if f.has(f2) {
	 121  			ss = append(ss, formatNames[f2])
	 122  		}
	 123  	}
	 124  	switch len(ss) {
	 125  	case 0:
	 126  		return "<unknown>"
	 127  	case 1:
	 128  		return ss[0]
	 129  	default:
	 130  		return "(" + strings.Join(ss, " | ") + ")"
	 131  	}
	 132  }
	 133  
	 134  // Magics used to identify various formats.
	 135  const (
	 136  	magicGNU, versionGNU		 = "ustar ", " \x00"
	 137  	magicUSTAR, versionUSTAR = "ustar\x00", "00"
	 138  	trailerSTAR							= "tar\x00"
	 139  )
	 140  
	 141  // Size constants from various tar specifications.
	 142  const (
	 143  	blockSize	= 512 // Size of each block in a tar stream
	 144  	nameSize	 = 100 // Max length of the name field in USTAR format
	 145  	prefixSize = 155 // Max length of the prefix field in USTAR format
	 146  )
	 147  
	 148  // blockPadding computes the number of bytes needed to pad offset up to the
	 149  // nearest block edge where 0 <= n < blockSize.
	 150  func blockPadding(offset int64) (n int64) {
	 151  	return -offset & (blockSize - 1)
	 152  }
	 153  
	 154  var zeroBlock block
	 155  
	 156  type block [blockSize]byte
	 157  
	 158  // Convert block to any number of formats.
	 159  func (b *block) V7() *headerV7			 { return (*headerV7)(b) }
	 160  func (b *block) GNU() *headerGNU		 { return (*headerGNU)(b) }
	 161  func (b *block) STAR() *headerSTAR	 { return (*headerSTAR)(b) }
	 162  func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
	 163  func (b *block) Sparse() sparseArray { return sparseArray(b[:]) }
	 164  
	 165  // GetFormat checks that the block is a valid tar header based on the checksum.
	 166  // It then attempts to guess the specific format based on magic values.
	 167  // If the checksum fails, then FormatUnknown is returned.
	 168  func (b *block) GetFormat() Format {
	 169  	// Verify checksum.
	 170  	var p parser
	 171  	value := p.parseOctal(b.V7().Chksum())
	 172  	chksum1, chksum2 := b.ComputeChecksum()
	 173  	if p.err != nil || (value != chksum1 && value != chksum2) {
	 174  		return FormatUnknown
	 175  	}
	 176  
	 177  	// Guess the magic values.
	 178  	magic := string(b.USTAR().Magic())
	 179  	version := string(b.USTAR().Version())
	 180  	trailer := string(b.STAR().Trailer())
	 181  	switch {
	 182  	case magic == magicUSTAR && trailer == trailerSTAR:
	 183  		return formatSTAR
	 184  	case magic == magicUSTAR:
	 185  		return FormatUSTAR | FormatPAX
	 186  	case magic == magicGNU && version == versionGNU:
	 187  		return FormatGNU
	 188  	default:
	 189  		return formatV7
	 190  	}
	 191  }
	 192  
	 193  // SetFormat writes the magic values necessary for specified format
	 194  // and then updates the checksum accordingly.
	 195  func (b *block) SetFormat(format Format) {
	 196  	// Set the magic values.
	 197  	switch {
	 198  	case format.has(formatV7):
	 199  		// Do nothing.
	 200  	case format.has(FormatGNU):
	 201  		copy(b.GNU().Magic(), magicGNU)
	 202  		copy(b.GNU().Version(), versionGNU)
	 203  	case format.has(formatSTAR):
	 204  		copy(b.STAR().Magic(), magicUSTAR)
	 205  		copy(b.STAR().Version(), versionUSTAR)
	 206  		copy(b.STAR().Trailer(), trailerSTAR)
	 207  	case format.has(FormatUSTAR | FormatPAX):
	 208  		copy(b.USTAR().Magic(), magicUSTAR)
	 209  		copy(b.USTAR().Version(), versionUSTAR)
	 210  	default:
	 211  		panic("invalid format")
	 212  	}
	 213  
	 214  	// Update checksum.
	 215  	// This field is special in that it is terminated by a NULL then space.
	 216  	var f formatter
	 217  	field := b.V7().Chksum()
	 218  	chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
	 219  	f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
	 220  	field[7] = ' '
	 221  }
	 222  
	 223  // ComputeChecksum computes the checksum for the header block.
	 224  // POSIX specifies a sum of the unsigned byte values, but the Sun tar used
	 225  // signed byte values.
	 226  // We compute and return both.
	 227  func (b *block) ComputeChecksum() (unsigned, signed int64) {
	 228  	for i, c := range b {
	 229  		if 148 <= i && i < 156 {
	 230  			c = ' ' // Treat the checksum field itself as all spaces.
	 231  		}
	 232  		unsigned += int64(c)
	 233  		signed += int64(int8(c))
	 234  	}
	 235  	return unsigned, signed
	 236  }
	 237  
	 238  // Reset clears the block with all zeros.
	 239  func (b *block) Reset() {
	 240  	*b = block{}
	 241  }
	 242  
	 243  type headerV7 [blockSize]byte
	 244  
	 245  func (h *headerV7) Name() []byte		 { return h[000:][:100] }
	 246  func (h *headerV7) Mode() []byte		 { return h[100:][:8] }
	 247  func (h *headerV7) UID() []byte			{ return h[108:][:8] }
	 248  func (h *headerV7) GID() []byte			{ return h[116:][:8] }
	 249  func (h *headerV7) Size() []byte		 { return h[124:][:12] }
	 250  func (h *headerV7) ModTime() []byte	{ return h[136:][:12] }
	 251  func (h *headerV7) Chksum() []byte	 { return h[148:][:8] }
	 252  func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
	 253  func (h *headerV7) LinkName() []byte { return h[157:][:100] }
	 254  
	 255  type headerGNU [blockSize]byte
	 256  
	 257  func (h *headerGNU) V7() *headerV7			 { return (*headerV7)(h) }
	 258  func (h *headerGNU) Magic() []byte			 { return h[257:][:6] }
	 259  func (h *headerGNU) Version() []byte		 { return h[263:][:2] }
	 260  func (h *headerGNU) UserName() []byte		{ return h[265:][:32] }
	 261  func (h *headerGNU) GroupName() []byte	 { return h[297:][:32] }
	 262  func (h *headerGNU) DevMajor() []byte		{ return h[329:][:8] }
	 263  func (h *headerGNU) DevMinor() []byte		{ return h[337:][:8] }
	 264  func (h *headerGNU) AccessTime() []byte	{ return h[345:][:12] }
	 265  func (h *headerGNU) ChangeTime() []byte	{ return h[357:][:12] }
	 266  func (h *headerGNU) Sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) }
	 267  func (h *headerGNU) RealSize() []byte		{ return h[483:][:12] }
	 268  
	 269  type headerSTAR [blockSize]byte
	 270  
	 271  func (h *headerSTAR) V7() *headerV7			{ return (*headerV7)(h) }
	 272  func (h *headerSTAR) Magic() []byte			{ return h[257:][:6] }
	 273  func (h *headerSTAR) Version() []byte		{ return h[263:][:2] }
	 274  func (h *headerSTAR) UserName() []byte	 { return h[265:][:32] }
	 275  func (h *headerSTAR) GroupName() []byte	{ return h[297:][:32] }
	 276  func (h *headerSTAR) DevMajor() []byte	 { return h[329:][:8] }
	 277  func (h *headerSTAR) DevMinor() []byte	 { return h[337:][:8] }
	 278  func (h *headerSTAR) Prefix() []byte		 { return h[345:][:131] }
	 279  func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
	 280  func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
	 281  func (h *headerSTAR) Trailer() []byte		{ return h[508:][:4] }
	 282  
	 283  type headerUSTAR [blockSize]byte
	 284  
	 285  func (h *headerUSTAR) V7() *headerV7		 { return (*headerV7)(h) }
	 286  func (h *headerUSTAR) Magic() []byte		 { return h[257:][:6] }
	 287  func (h *headerUSTAR) Version() []byte	 { return h[263:][:2] }
	 288  func (h *headerUSTAR) UserName() []byte	{ return h[265:][:32] }
	 289  func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
	 290  func (h *headerUSTAR) DevMajor() []byte	{ return h[329:][:8] }
	 291  func (h *headerUSTAR) DevMinor() []byte	{ return h[337:][:8] }
	 292  func (h *headerUSTAR) Prefix() []byte		{ return h[345:][:155] }
	 293  
	 294  type sparseArray []byte
	 295  
	 296  func (s sparseArray) Entry(i int) sparseElem { return sparseElem(s[i*24:]) }
	 297  func (s sparseArray) IsExtended() []byte		 { return s[24*s.MaxEntries():][:1] }
	 298  func (s sparseArray) MaxEntries() int				{ return len(s) / 24 }
	 299  
	 300  type sparseElem []byte
	 301  
	 302  func (s sparseElem) Offset() []byte { return s[00:][:12] }
	 303  func (s sparseElem) Length() []byte { return s[12:][:12] }
	 304  

View as plain text