...

Source file src/archive/tar/strconv.go

Documentation: archive/tar

		 1  // Copyright 2016 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package tar
		 6  
		 7  import (
		 8  	"bytes"
		 9  	"fmt"
		10  	"strconv"
		11  	"strings"
		12  	"time"
		13  )
		14  
		15  // hasNUL reports whether the NUL character exists within s.
		16  func hasNUL(s string) bool {
		17  	return strings.IndexByte(s, 0) >= 0
		18  }
		19  
		20  // isASCII reports whether the input is an ASCII C-style string.
		21  func isASCII(s string) bool {
		22  	for _, c := range s {
		23  		if c >= 0x80 || c == 0x00 {
		24  			return false
		25  		}
		26  	}
		27  	return true
		28  }
		29  
		30  // toASCII converts the input to an ASCII C-style string.
		31  // This is a best effort conversion, so invalid characters are dropped.
		32  func toASCII(s string) string {
		33  	if isASCII(s) {
		34  		return s
		35  	}
		36  	b := make([]byte, 0, len(s))
		37  	for _, c := range s {
		38  		if c < 0x80 && c != 0x00 {
		39  			b = append(b, byte(c))
		40  		}
		41  	}
		42  	return string(b)
		43  }
		44  
		45  type parser struct {
		46  	err error // Last error seen
		47  }
		48  
		49  type formatter struct {
		50  	err error // Last error seen
		51  }
		52  
		53  // parseString parses bytes as a NUL-terminated C-style string.
		54  // If a NUL byte is not found then the whole slice is returned as a string.
		55  func (*parser) parseString(b []byte) string {
		56  	if i := bytes.IndexByte(b, 0); i >= 0 {
		57  		return string(b[:i])
		58  	}
		59  	return string(b)
		60  }
		61  
		62  // formatString copies s into b, NUL-terminating if possible.
		63  func (f *formatter) formatString(b []byte, s string) {
		64  	if len(s) > len(b) {
		65  		f.err = ErrFieldTooLong
		66  	}
		67  	copy(b, s)
		68  	if len(s) < len(b) {
		69  		b[len(s)] = 0
		70  	}
		71  
		72  	// Some buggy readers treat regular files with a trailing slash
		73  	// in the V7 path field as a directory even though the full path
		74  	// recorded elsewhere (e.g., via PAX record) contains no trailing slash.
		75  	if len(s) > len(b) && b[len(b)-1] == '/' {
		76  		n := len(strings.TrimRight(s[:len(b)], "/"))
		77  		b[n] = 0 // Replace trailing slash with NUL terminator
		78  	}
		79  }
		80  
		81  // fitsInBase256 reports whether x can be encoded into n bytes using base-256
		82  // encoding. Unlike octal encoding, base-256 encoding does not require that the
		83  // string ends with a NUL character. Thus, all n bytes are available for output.
		84  //
		85  // If operating in binary mode, this assumes strict GNU binary mode; which means
		86  // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
		87  // equivalent to the sign bit in two's complement form.
		88  func fitsInBase256(n int, x int64) bool {
		89  	binBits := uint(n-1) * 8
		90  	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
		91  }
		92  
		93  // parseNumeric parses the input as being encoded in either base-256 or octal.
		94  // This function may return negative numbers.
		95  // If parsing fails or an integer overflow occurs, err will be set.
		96  func (p *parser) parseNumeric(b []byte) int64 {
		97  	// Check for base-256 (binary) format first.
		98  	// If the first bit is set, then all following bits constitute a two's
		99  	// complement encoded number in big-endian byte order.
	 100  	if len(b) > 0 && b[0]&0x80 != 0 {
	 101  		// Handling negative numbers relies on the following identity:
	 102  		//	-a-1 == ^a
	 103  		//
	 104  		// If the number is negative, we use an inversion mask to invert the
	 105  		// data bytes and treat the value as an unsigned number.
	 106  		var inv byte // 0x00 if positive or zero, 0xff if negative
	 107  		if b[0]&0x40 != 0 {
	 108  			inv = 0xff
	 109  		}
	 110  
	 111  		var x uint64
	 112  		for i, c := range b {
	 113  			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
	 114  			if i == 0 {
	 115  				c &= 0x7f // Ignore signal bit in first byte
	 116  			}
	 117  			if (x >> 56) > 0 {
	 118  				p.err = ErrHeader // Integer overflow
	 119  				return 0
	 120  			}
	 121  			x = x<<8 | uint64(c)
	 122  		}
	 123  		if (x >> 63) > 0 {
	 124  			p.err = ErrHeader // Integer overflow
	 125  			return 0
	 126  		}
	 127  		if inv == 0xff {
	 128  			return ^int64(x)
	 129  		}
	 130  		return int64(x)
	 131  	}
	 132  
	 133  	// Normal case is base-8 (octal) format.
	 134  	return p.parseOctal(b)
	 135  }
	 136  
	 137  // formatNumeric encodes x into b using base-8 (octal) encoding if possible.
	 138  // Otherwise it will attempt to use base-256 (binary) encoding.
	 139  func (f *formatter) formatNumeric(b []byte, x int64) {
	 140  	if fitsInOctal(len(b), x) {
	 141  		f.formatOctal(b, x)
	 142  		return
	 143  	}
	 144  
	 145  	if fitsInBase256(len(b), x) {
	 146  		for i := len(b) - 1; i >= 0; i-- {
	 147  			b[i] = byte(x)
	 148  			x >>= 8
	 149  		}
	 150  		b[0] |= 0x80 // Highest bit indicates binary format
	 151  		return
	 152  	}
	 153  
	 154  	f.formatOctal(b, 0) // Last resort, just write zero
	 155  	f.err = ErrFieldTooLong
	 156  }
	 157  
	 158  func (p *parser) parseOctal(b []byte) int64 {
	 159  	// Because unused fields are filled with NULs, we need
	 160  	// to skip leading NULs. Fields may also be padded with
	 161  	// spaces or NULs.
	 162  	// So we remove leading and trailing NULs and spaces to
	 163  	// be sure.
	 164  	b = bytes.Trim(b, " \x00")
	 165  
	 166  	if len(b) == 0 {
	 167  		return 0
	 168  	}
	 169  	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
	 170  	if perr != nil {
	 171  		p.err = ErrHeader
	 172  	}
	 173  	return int64(x)
	 174  }
	 175  
	 176  func (f *formatter) formatOctal(b []byte, x int64) {
	 177  	if !fitsInOctal(len(b), x) {
	 178  		x = 0 // Last resort, just write zero
	 179  		f.err = ErrFieldTooLong
	 180  	}
	 181  
	 182  	s := strconv.FormatInt(x, 8)
	 183  	// Add leading zeros, but leave room for a NUL.
	 184  	if n := len(b) - len(s) - 1; n > 0 {
	 185  		s = strings.Repeat("0", n) + s
	 186  	}
	 187  	f.formatString(b, s)
	 188  }
	 189  
	 190  // fitsInOctal reports whether the integer x fits in a field n-bytes long
	 191  // using octal encoding with the appropriate NUL terminator.
	 192  func fitsInOctal(n int, x int64) bool {
	 193  	octBits := uint(n-1) * 3
	 194  	return x >= 0 && (n >= 22 || x < 1<<octBits)
	 195  }
	 196  
	 197  // parsePAXTime takes a string of the form %d.%d as described in the PAX
	 198  // specification. Note that this implementation allows for negative timestamps,
	 199  // which is allowed for by the PAX specification, but not always portable.
	 200  func parsePAXTime(s string) (time.Time, error) {
	 201  	const maxNanoSecondDigits = 9
	 202  
	 203  	// Split string into seconds and sub-seconds parts.
	 204  	ss, sn := s, ""
	 205  	if pos := strings.IndexByte(s, '.'); pos >= 0 {
	 206  		ss, sn = s[:pos], s[pos+1:]
	 207  	}
	 208  
	 209  	// Parse the seconds.
	 210  	secs, err := strconv.ParseInt(ss, 10, 64)
	 211  	if err != nil {
	 212  		return time.Time{}, ErrHeader
	 213  	}
	 214  	if len(sn) == 0 {
	 215  		return time.Unix(secs, 0), nil // No sub-second values
	 216  	}
	 217  
	 218  	// Parse the nanoseconds.
	 219  	if strings.Trim(sn, "0123456789") != "" {
	 220  		return time.Time{}, ErrHeader
	 221  	}
	 222  	if len(sn) < maxNanoSecondDigits {
	 223  		sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
	 224  	} else {
	 225  		sn = sn[:maxNanoSecondDigits] // Right truncate
	 226  	}
	 227  	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
	 228  	if len(ss) > 0 && ss[0] == '-' {
	 229  		return time.Unix(secs, -1*nsecs), nil // Negative correction
	 230  	}
	 231  	return time.Unix(secs, nsecs), nil
	 232  }
	 233  
	 234  // formatPAXTime converts ts into a time of the form %d.%d as described in the
	 235  // PAX specification. This function is capable of negative timestamps.
	 236  func formatPAXTime(ts time.Time) (s string) {
	 237  	secs, nsecs := ts.Unix(), ts.Nanosecond()
	 238  	if nsecs == 0 {
	 239  		return strconv.FormatInt(secs, 10)
	 240  	}
	 241  
	 242  	// If seconds is negative, then perform correction.
	 243  	sign := ""
	 244  	if secs < 0 {
	 245  		sign = "-"						 // Remember sign
	 246  		secs = -(secs + 1)		 // Add a second to secs
	 247  		nsecs = -(nsecs - 1e9) // Take that second away from nsecs
	 248  	}
	 249  	return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
	 250  }
	 251  
	 252  // parsePAXRecord parses the input PAX record string into a key-value pair.
	 253  // If parsing is successful, it will slice off the currently read record and
	 254  // return the remainder as r.
	 255  func parsePAXRecord(s string) (k, v, r string, err error) {
	 256  	// The size field ends at the first space.
	 257  	sp := strings.IndexByte(s, ' ')
	 258  	if sp == -1 {
	 259  		return "", "", s, ErrHeader
	 260  	}
	 261  
	 262  	// Parse the first token as a decimal integer.
	 263  	n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
	 264  	if perr != nil || n < 5 || int64(len(s)) < n {
	 265  		return "", "", s, ErrHeader
	 266  	}
	 267  
	 268  	afterSpace := int64(sp + 1)
	 269  	beforeLastNewLine := n - 1
	 270  	// In some cases, "length" was perhaps padded/malformed, and
	 271  	// trying to index past where the space supposedly is goes past
	 272  	// the end of the actual record.
	 273  	// For example:
	 274  	//		"0000000000000000000000000000000030 mtime=1432668921.098285006\n30 ctime=2147483649.15163319"
	 275  	//																	^		 ^
	 276  	//																	|		 |
	 277  	//																	|	afterSpace=35
	 278  	//																	|
	 279  	//													beforeLastNewLine=29
	 280  	// yet indexOf(firstSpace) MUST BE before endOfRecord.
	 281  	//
	 282  	// See https://golang.org/issues/40196.
	 283  	if afterSpace >= beforeLastNewLine {
	 284  		return "", "", s, ErrHeader
	 285  	}
	 286  
	 287  	// Extract everything between the space and the final newline.
	 288  	rec, nl, rem := s[afterSpace:beforeLastNewLine], s[beforeLastNewLine:n], s[n:]
	 289  	if nl != "\n" {
	 290  		return "", "", s, ErrHeader
	 291  	}
	 292  
	 293  	// The first equals separates the key from the value.
	 294  	eq := strings.IndexByte(rec, '=')
	 295  	if eq == -1 {
	 296  		return "", "", s, ErrHeader
	 297  	}
	 298  	k, v = rec[:eq], rec[eq+1:]
	 299  
	 300  	if !validPAXRecord(k, v) {
	 301  		return "", "", s, ErrHeader
	 302  	}
	 303  	return k, v, rem, nil
	 304  }
	 305  
	 306  // formatPAXRecord formats a single PAX record, prefixing it with the
	 307  // appropriate length.
	 308  func formatPAXRecord(k, v string) (string, error) {
	 309  	if !validPAXRecord(k, v) {
	 310  		return "", ErrHeader
	 311  	}
	 312  
	 313  	const padding = 3 // Extra padding for ' ', '=', and '\n'
	 314  	size := len(k) + len(v) + padding
	 315  	size += len(strconv.Itoa(size))
	 316  	record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
	 317  
	 318  	// Final adjustment if adding size field increased the record size.
	 319  	if len(record) != size {
	 320  		size = len(record)
	 321  		record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
	 322  	}
	 323  	return record, nil
	 324  }
	 325  
	 326  // validPAXRecord reports whether the key-value pair is valid where each
	 327  // record is formatted as:
	 328  //	"%d %s=%s\n" % (size, key, value)
	 329  //
	 330  // Keys and values should be UTF-8, but the number of bad writers out there
	 331  // forces us to be a more liberal.
	 332  // Thus, we only reject all keys with NUL, and only reject NULs in values
	 333  // for the PAX version of the USTAR string fields.
	 334  // The key must not contain an '=' character.
	 335  func validPAXRecord(k, v string) bool {
	 336  	if k == "" || strings.IndexByte(k, '=') >= 0 {
	 337  		return false
	 338  	}
	 339  	switch k {
	 340  	case paxPath, paxLinkpath, paxUname, paxGname:
	 341  		return !hasNUL(v)
	 342  	default:
	 343  		return !hasNUL(k)
	 344  	}
	 345  }
	 346  

View as plain text