url.go

Documentation: net/url

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Package url parses URLs and implements query escaping.
		 6  package url
		 7  
		 8  // See RFC 3986. This package generally follows RFC 3986, except where
		 9  // it deviates for compatibility reasons. When sending changes, first
		10  // search old issues for history on decisions. Unit tests should also
		11  // contain references to issue numbers with details.
		12  
		13  import (
		14  	"errors"
		15  	"fmt"
		16  	"sort"
		17  	"strconv"
		18  	"strings"
		19  )
		20  
		21  // Error reports an error and the operation and URL that caused it.
		22  type Error struct {
		23  	Op	string
		24  	URL string
		25  	Err error
		26  }
		27  
		28  func (e *Error) Unwrap() error { return e.Err }
		29  func (e *Error) Error() string { return fmt.Sprintf("%s %q: %s", e.Op, e.URL, e.Err) }
		30  
		31  func (e *Error) Timeout() bool {
		32  	t, ok := e.Err.(interface {
		33  		Timeout() bool
		34  	})
		35  	return ok && t.Timeout()
		36  }
		37  
		38  func (e *Error) Temporary() bool {
		39  	t, ok := e.Err.(interface {
		40  		Temporary() bool
		41  	})
		42  	return ok && t.Temporary()
		43  }
		44  
		45  const upperhex = "0123456789ABCDEF"
		46  
		47  func ishex(c byte) bool {
		48  	switch {
		49  	case '0' <= c && c <= '9':
		50  		return true
		51  	case 'a' <= c && c <= 'f':
		52  		return true
		53  	case 'A' <= c && c <= 'F':
		54  		return true
		55  	}
		56  	return false
		57  }
		58  
		59  func unhex(c byte) byte {
		60  	switch {
		61  	case '0' <= c && c <= '9':
		62  		return c - '0'
		63  	case 'a' <= c && c <= 'f':
		64  		return c - 'a' + 10
		65  	case 'A' <= c && c <= 'F':
		66  		return c - 'A' + 10
		67  	}
		68  	return 0
		69  }
		70  
		71  type encoding int
		72  
		73  const (
		74  	encodePath encoding = 1 + iota
		75  	encodePathSegment
		76  	encodeHost
		77  	encodeZone
		78  	encodeUserPassword
		79  	encodeQueryComponent
		80  	encodeFragment
		81  )
		82  
		83  type EscapeError string
		84  
		85  func (e EscapeError) Error() string {
		86  	return "invalid URL escape " + strconv.Quote(string(e))
		87  }
		88  
		89  type InvalidHostError string
		90  
		91  func (e InvalidHostError) Error() string {
		92  	return "invalid character " + strconv.Quote(string(e)) + " in host name"
		93  }
		94  
		95  // Return true if the specified character should be escaped when
		96  // appearing in a URL string, according to RFC 3986.
		97  //
		98  // Please be informed that for now shouldEscape does not check all
		99  // reserved characters correctly. See golang.org/issue/5684.
	 100  func shouldEscape(c byte, mode encoding) bool {
	 101  	// §2.3 Unreserved characters (alphanum)
	 102  	if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
	 103  		return false
	 104  	}
	 105  
	 106  	if mode == encodeHost || mode == encodeZone {
	 107  		// §3.2.2 Host allows
	 108  		//	sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
	 109  		// as part of reg-name.
	 110  		// We add : because we include :port as part of host.
	 111  		// We add [ ] because we include [ipv6]:port as part of host.
	 112  		// We add < > because they're the only characters left that
	 113  		// we could possibly allow, and Parse will reject them if we
	 114  		// escape them (because hosts can't use %-encoding for
	 115  		// ASCII bytes).
	 116  		switch c {
	 117  		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
	 118  			return false
	 119  		}
	 120  	}
	 121  
	 122  	switch c {
	 123  	case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
	 124  		return false
	 125  
	 126  	case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
	 127  		// Different sections of the URL allow a few of
	 128  		// the reserved characters to appear unescaped.
	 129  		switch mode {
	 130  		case encodePath: // §3.3
	 131  			// The RFC allows : @ & = + $ but saves / ; , for assigning
	 132  			// meaning to individual path segments. This package
	 133  			// only manipulates the path as a whole, so we allow those
	 134  			// last three as well. That leaves only ? to escape.
	 135  			return c == '?'
	 136  
	 137  		case encodePathSegment: // §3.3
	 138  			// The RFC allows : @ & = + $ but saves / ; , for assigning
	 139  			// meaning to individual path segments.
	 140  			return c == '/' || c == ';' || c == ',' || c == '?'
	 141  
	 142  		case encodeUserPassword: // §3.2.1
	 143  			// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
	 144  			// userinfo, so we must escape only '@', '/', and '?'.
	 145  			// The parsing of userinfo treats ':' as special so we must escape
	 146  			// that too.
	 147  			return c == '@' || c == '/' || c == '?' || c == ':'
	 148  
	 149  		case encodeQueryComponent: // §3.4
	 150  			// The RFC reserves (so we must escape) everything.
	 151  			return true
	 152  
	 153  		case encodeFragment: // §4.1
	 154  			// The RFC text is silent but the grammar allows
	 155  			// everything, so escape nothing.
	 156  			return false
	 157  		}
	 158  	}
	 159  
	 160  	if mode == encodeFragment {
	 161  		// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
	 162  		// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
	 163  		// need to be escaped. To minimize potential breakage, we apply two restrictions:
	 164  		// (1) we always escape sub-delims outside of the fragment, and (2) we always
	 165  		// escape single quote to avoid breaking callers that had previously assumed that
	 166  		// single quotes would be escaped. See issue #19917.
	 167  		switch c {
	 168  		case '!', '(', ')', '*':
	 169  			return false
	 170  		}
	 171  	}
	 172  
	 173  	// Everything else must be escaped.
	 174  	return true
	 175  }
	 176  
	 177  // QueryUnescape does the inverse transformation of QueryEscape,
	 178  // converting each 3-byte encoded substring of the form "%AB" into the
	 179  // hex-decoded byte 0xAB.
	 180  // It returns an error if any % is not followed by two hexadecimal
	 181  // digits.
	 182  func QueryUnescape(s string) (string, error) {
	 183  	return unescape(s, encodeQueryComponent)
	 184  }
	 185  
	 186  // PathUnescape does the inverse transformation of PathEscape,
	 187  // converting each 3-byte encoded substring of the form "%AB" into the
	 188  // hex-decoded byte 0xAB. It returns an error if any % is not followed
	 189  // by two hexadecimal digits.
	 190  //
	 191  // PathUnescape is identical to QueryUnescape except that it does not
	 192  // unescape '+' to ' ' (space).
	 193  func PathUnescape(s string) (string, error) {
	 194  	return unescape(s, encodePathSegment)
	 195  }
	 196  
	 197  // unescape unescapes a string; the mode specifies
	 198  // which section of the URL string is being unescaped.
	 199  func unescape(s string, mode encoding) (string, error) {
	 200  	// Count %, check that they're well-formed.
	 201  	n := 0
	 202  	hasPlus := false
	 203  	for i := 0; i < len(s); {
	 204  		switch s[i] {
	 205  		case '%':
	 206  			n++
	 207  			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
	 208  				s = s[i:]
	 209  				if len(s) > 3 {
	 210  					s = s[:3]
	 211  				}
	 212  				return "", EscapeError(s)
	 213  			}
	 214  			// Per https://tools.ietf.org/html/rfc3986#page-21
	 215  			// in the host component %-encoding can only be used
	 216  			// for non-ASCII bytes.
	 217  			// But https://tools.ietf.org/html/rfc6874#section-2
	 218  			// introduces %25 being allowed to escape a percent sign
	 219  			// in IPv6 scoped-address literals. Yay.
	 220  			if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
	 221  				return "", EscapeError(s[i : i+3])
	 222  			}
	 223  			if mode == encodeZone {
	 224  				// RFC 6874 says basically "anything goes" for zone identifiers
	 225  				// and that even non-ASCII can be redundantly escaped,
	 226  				// but it seems prudent to restrict %-escaped bytes here to those
	 227  				// that are valid host name bytes in their unescaped form.
	 228  				// That is, you can use escaping in the zone identifier but not
	 229  				// to introduce bytes you couldn't just write directly.
	 230  				// But Windows puts spaces here! Yay.
	 231  				v := unhex(s[i+1])<<4 | unhex(s[i+2])
	 232  				if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) {
	 233  					return "", EscapeError(s[i : i+3])
	 234  				}
	 235  			}
	 236  			i += 3
	 237  		case '+':
	 238  			hasPlus = mode == encodeQueryComponent
	 239  			i++
	 240  		default:
	 241  			if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
	 242  				return "", InvalidHostError(s[i : i+1])
	 243  			}
	 244  			i++
	 245  		}
	 246  	}
	 247  
	 248  	if n == 0 && !hasPlus {
	 249  		return s, nil
	 250  	}
	 251  
	 252  	var t strings.Builder
	 253  	t.Grow(len(s) - 2*n)
	 254  	for i := 0; i < len(s); i++ {
	 255  		switch s[i] {
	 256  		case '%':
	 257  			t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
	 258  			i += 2
	 259  		case '+':
	 260  			if mode == encodeQueryComponent {
	 261  				t.WriteByte(' ')
	 262  			} else {
	 263  				t.WriteByte('+')
	 264  			}
	 265  		default:
	 266  			t.WriteByte(s[i])
	 267  		}
	 268  	}
	 269  	return t.String(), nil
	 270  }
	 271  
	 272  // QueryEscape escapes the string so it can be safely placed
	 273  // inside a URL query.
	 274  func QueryEscape(s string) string {
	 275  	return escape(s, encodeQueryComponent)
	 276  }
	 277  
	 278  // PathEscape escapes the string so it can be safely placed inside a URL path segment,
	 279  // replacing special characters (including /) with %XX sequences as needed.
	 280  func PathEscape(s string) string {
	 281  	return escape(s, encodePathSegment)
	 282  }
	 283  
	 284  func escape(s string, mode encoding) string {
	 285  	spaceCount, hexCount := 0, 0
	 286  	for i := 0; i < len(s); i++ {
	 287  		c := s[i]
	 288  		if shouldEscape(c, mode) {
	 289  			if c == ' ' && mode == encodeQueryComponent {
	 290  				spaceCount++
	 291  			} else {
	 292  				hexCount++
	 293  			}
	 294  		}
	 295  	}
	 296  
	 297  	if spaceCount == 0 && hexCount == 0 {
	 298  		return s
	 299  	}
	 300  
	 301  	var buf [64]byte
	 302  	var t []byte
	 303  
	 304  	required := len(s) + 2*hexCount
	 305  	if required <= len(buf) {
	 306  		t = buf[:required]
	 307  	} else {
	 308  		t = make([]byte, required)
	 309  	}
	 310  
	 311  	if hexCount == 0 {
	 312  		copy(t, s)
	 313  		for i := 0; i < len(s); i++ {
	 314  			if s[i] == ' ' {
	 315  				t[i] = '+'
	 316  			}
	 317  		}
	 318  		return string(t)
	 319  	}
	 320  
	 321  	j := 0
	 322  	for i := 0; i < len(s); i++ {
	 323  		switch c := s[i]; {
	 324  		case c == ' ' && mode == encodeQueryComponent:
	 325  			t[j] = '+'
	 326  			j++
	 327  		case shouldEscape(c, mode):
	 328  			t[j] = '%'
	 329  			t[j+1] = upperhex[c>>4]
	 330  			t[j+2] = upperhex[c&15]
	 331  			j += 3
	 332  		default:
	 333  			t[j] = s[i]
	 334  			j++
	 335  		}
	 336  	}
	 337  	return string(t)
	 338  }
	 339  
	 340  // A URL represents a parsed URL (technically, a URI reference).
	 341  //
	 342  // The general form represented is:
	 343  //
	 344  //	[scheme:][//[userinfo@]host][/]path[?query][#fragment]
	 345  //
	 346  // URLs that do not start with a slash after the scheme are interpreted as:
	 347  //
	 348  //	scheme:opaque[?query][#fragment]
	 349  //
	 350  // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
	 351  // A consequence is that it is impossible to tell which slashes in the Path were
	 352  // slashes in the raw URL and which were %2f. This distinction is rarely important,
	 353  // but when it is, the code should use RawPath, an optional field which only gets
	 354  // set if the default encoding is different from Path.
	 355  //
	 356  // URL's String method uses the EscapedPath method to obtain the path. See the
	 357  // EscapedPath method for more details.
	 358  type URL struct {
	 359  	Scheme			string
	 360  	Opaque			string		// encoded opaque data
	 361  	User				*Userinfo // username and password information
	 362  	Host				string		// host or host:port
	 363  	Path				string		// path (relative paths may omit leading slash)
	 364  	RawPath		 string		// encoded path hint (see EscapedPath method)
	 365  	ForceQuery	bool			// append a query ('?') even if RawQuery is empty
	 366  	RawQuery		string		// encoded query values, without '?'
	 367  	Fragment		string		// fragment for references, without '#'
	 368  	RawFragment string		// encoded fragment hint (see EscapedFragment method)
	 369  }
	 370  
	 371  // User returns a Userinfo containing the provided username
	 372  // and no password set.
	 373  func User(username string) *Userinfo {
	 374  	return &Userinfo{username, "", false}
	 375  }
	 376  
	 377  // UserPassword returns a Userinfo containing the provided username
	 378  // and password.
	 379  //
	 380  // This functionality should only be used with legacy web sites.
	 381  // RFC 2396 warns that interpreting Userinfo this way
	 382  // ``is NOT RECOMMENDED, because the passing of authentication
	 383  // information in clear text (such as URI) has proven to be a
	 384  // security risk in almost every case where it has been used.''
	 385  func UserPassword(username, password string) *Userinfo {
	 386  	return &Userinfo{username, password, true}
	 387  }
	 388  
	 389  // The Userinfo type is an immutable encapsulation of username and
	 390  // password details for a URL. An existing Userinfo value is guaranteed
	 391  // to have a username set (potentially empty, as allowed by RFC 2396),
	 392  // and optionally a password.
	 393  type Userinfo struct {
	 394  	username		string
	 395  	password		string
	 396  	passwordSet bool
	 397  }
	 398  
	 399  // Username returns the username.
	 400  func (u *Userinfo) Username() string {
	 401  	if u == nil {
	 402  		return ""
	 403  	}
	 404  	return u.username
	 405  }
	 406  
	 407  // Password returns the password in case it is set, and whether it is set.
	 408  func (u *Userinfo) Password() (string, bool) {
	 409  	if u == nil {
	 410  		return "", false
	 411  	}
	 412  	return u.password, u.passwordSet
	 413  }
	 414  
	 415  // String returns the encoded userinfo information in the standard form
	 416  // of "username[:password]".
	 417  func (u *Userinfo) String() string {
	 418  	if u == nil {
	 419  		return ""
	 420  	}
	 421  	s := escape(u.username, encodeUserPassword)
	 422  	if u.passwordSet {
	 423  		s += ":" + escape(u.password, encodeUserPassword)
	 424  	}
	 425  	return s
	 426  }
	 427  
	 428  // Maybe rawURL is of the form scheme:path.
	 429  // (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
	 430  // If so, return scheme, path; else return "", rawURL.
	 431  func getScheme(rawURL string) (scheme, path string, err error) {
	 432  	for i := 0; i < len(rawURL); i++ {
	 433  		c := rawURL[i]
	 434  		switch {
	 435  		case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
	 436  		// do nothing
	 437  		case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
	 438  			if i == 0 {
	 439  				return "", rawURL, nil
	 440  			}
	 441  		case c == ':':
	 442  			if i == 0 {
	 443  				return "", "", errors.New("missing protocol scheme")
	 444  			}
	 445  			return rawURL[:i], rawURL[i+1:], nil
	 446  		default:
	 447  			// we have encountered an invalid character,
	 448  			// so there is no valid scheme
	 449  			return "", rawURL, nil
	 450  		}
	 451  	}
	 452  	return "", rawURL, nil
	 453  }
	 454  
	 455  // split slices s into two substrings separated by the first occurrence of
	 456  // sep. If cutc is true then sep is excluded from the second substring.
	 457  // If sep does not occur in s then s and the empty string is returned.
	 458  func split(s string, sep byte, cutc bool) (string, string) {
	 459  	i := strings.IndexByte(s, sep)
	 460  	if i < 0 {
	 461  		return s, ""
	 462  	}
	 463  	if cutc {
	 464  		return s[:i], s[i+1:]
	 465  	}
	 466  	return s[:i], s[i:]
	 467  }
	 468  
	 469  // Parse parses a raw url into a URL structure.
	 470  //
	 471  // The url may be relative (a path, without a host) or absolute
	 472  // (starting with a scheme). Trying to parse a hostname and path
	 473  // without a scheme is invalid but may not necessarily return an
	 474  // error, due to parsing ambiguities.
	 475  func Parse(rawURL string) (*URL, error) {
	 476  	// Cut off #frag
	 477  	u, frag := split(rawURL, '#', true)
	 478  	url, err := parse(u, false)
	 479  	if err != nil {
	 480  		return nil, &Error{"parse", u, err}
	 481  	}
	 482  	if frag == "" {
	 483  		return url, nil
	 484  	}
	 485  	if err = url.setFragment(frag); err != nil {
	 486  		return nil, &Error{"parse", rawURL, err}
	 487  	}
	 488  	return url, nil
	 489  }
	 490  
	 491  // ParseRequestURI parses a raw url into a URL structure. It assumes that
	 492  // url was received in an HTTP request, so the url is interpreted
	 493  // only as an absolute URI or an absolute path.
	 494  // The string url is assumed not to have a #fragment suffix.
	 495  // (Web browsers strip #fragment before sending the URL to a web server.)
	 496  func ParseRequestURI(rawURL string) (*URL, error) {
	 497  	url, err := parse(rawURL, true)
	 498  	if err != nil {
	 499  		return nil, &Error{"parse", rawURL, err}
	 500  	}
	 501  	return url, nil
	 502  }
	 503  
	 504  // parse parses a URL from a string in one of two contexts. If
	 505  // viaRequest is true, the URL is assumed to have arrived via an HTTP request,
	 506  // in which case only absolute URLs or path-absolute relative URLs are allowed.
	 507  // If viaRequest is false, all forms of relative URLs are allowed.
	 508  func parse(rawURL string, viaRequest bool) (*URL, error) {
	 509  	var rest string
	 510  	var err error
	 511  
	 512  	if stringContainsCTLByte(rawURL) {
	 513  		return nil, errors.New("net/url: invalid control character in URL")
	 514  	}
	 515  
	 516  	if rawURL == "" && viaRequest {
	 517  		return nil, errors.New("empty url")
	 518  	}
	 519  	url := new(URL)
	 520  
	 521  	if rawURL == "*" {
	 522  		url.Path = "*"
	 523  		return url, nil
	 524  	}
	 525  
	 526  	// Split off possible leading "http:", "mailto:", etc.
	 527  	// Cannot contain escaped characters.
	 528  	if url.Scheme, rest, err = getScheme(rawURL); err != nil {
	 529  		return nil, err
	 530  	}
	 531  	url.Scheme = strings.ToLower(url.Scheme)
	 532  
	 533  	if strings.HasSuffix(rest, "?") && strings.Count(rest, "?") == 1 {
	 534  		url.ForceQuery = true
	 535  		rest = rest[:len(rest)-1]
	 536  	} else {
	 537  		rest, url.RawQuery = split(rest, '?', true)
	 538  	}
	 539  
	 540  	if !strings.HasPrefix(rest, "/") {
	 541  		if url.Scheme != "" {
	 542  			// We consider rootless paths per RFC 3986 as opaque.
	 543  			url.Opaque = rest
	 544  			return url, nil
	 545  		}
	 546  		if viaRequest {
	 547  			return nil, errors.New("invalid URI for request")
	 548  		}
	 549  
	 550  		// Avoid confusion with malformed schemes, like cache_object:foo/bar.
	 551  		// See golang.org/issue/16822.
	 552  		//
	 553  		// RFC 3986, §3.3:
	 554  		// In addition, a URI reference (Section 4.1) may be a relative-path reference,
	 555  		// in which case the first path segment cannot contain a colon (":") character.
	 556  		colon := strings.Index(rest, ":")
	 557  		slash := strings.Index(rest, "/")
	 558  		if colon >= 0 && (slash < 0 || colon < slash) {
	 559  			// First path segment has colon. Not allowed in relative URL.
	 560  			return nil, errors.New("first path segment in URL cannot contain colon")
	 561  		}
	 562  	}
	 563  
	 564  	if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
	 565  		var authority string
	 566  		authority, rest = split(rest[2:], '/', false)
	 567  		url.User, url.Host, err = parseAuthority(authority)
	 568  		if err != nil {
	 569  			return nil, err
	 570  		}
	 571  	}
	 572  	// Set Path and, optionally, RawPath.
	 573  	// RawPath is a hint of the encoding of Path. We don't want to set it if
	 574  	// the default escaping of Path is equivalent, to help make sure that people
	 575  	// don't rely on it in general.
	 576  	if err := url.setPath(rest); err != nil {
	 577  		return nil, err
	 578  	}
	 579  	return url, nil
	 580  }
	 581  
	 582  func parseAuthority(authority string) (user *Userinfo, host string, err error) {
	 583  	i := strings.LastIndex(authority, "@")
	 584  	if i < 0 {
	 585  		host, err = parseHost(authority)
	 586  	} else {
	 587  		host, err = parseHost(authority[i+1:])
	 588  	}
	 589  	if err != nil {
	 590  		return nil, "", err
	 591  	}
	 592  	if i < 0 {
	 593  		return nil, host, nil
	 594  	}
	 595  	userinfo := authority[:i]
	 596  	if !validUserinfo(userinfo) {
	 597  		return nil, "", errors.New("net/url: invalid userinfo")
	 598  	}
	 599  	if !strings.Contains(userinfo, ":") {
	 600  		if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
	 601  			return nil, "", err
	 602  		}
	 603  		user = User(userinfo)
	 604  	} else {
	 605  		username, password := split(userinfo, ':', true)
	 606  		if username, err = unescape(username, encodeUserPassword); err != nil {
	 607  			return nil, "", err
	 608  		}
	 609  		if password, err = unescape(password, encodeUserPassword); err != nil {
	 610  			return nil, "", err
	 611  		}
	 612  		user = UserPassword(username, password)
	 613  	}
	 614  	return user, host, nil
	 615  }
	 616  
	 617  // parseHost parses host as an authority without user
	 618  // information. That is, as host[:port].
	 619  func parseHost(host string) (string, error) {
	 620  	if strings.HasPrefix(host, "[") {
	 621  		// Parse an IP-Literal in RFC 3986 and RFC 6874.
	 622  		// E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80".
	 623  		i := strings.LastIndex(host, "]")
	 624  		if i < 0 {
	 625  			return "", errors.New("missing ']' in host")
	 626  		}
	 627  		colonPort := host[i+1:]
	 628  		if !validOptionalPort(colonPort) {
	 629  			return "", fmt.Errorf("invalid port %q after host", colonPort)
	 630  		}
	 631  
	 632  		// RFC 6874 defines that %25 (%-encoded percent) introduces
	 633  		// the zone identifier, and the zone identifier can use basically
	 634  		// any %-encoding it likes. That's different from the host, which
	 635  		// can only %-encode non-ASCII bytes.
	 636  		// We do impose some restrictions on the zone, to avoid stupidity
	 637  		// like newlines.
	 638  		zone := strings.Index(host[:i], "%25")
	 639  		if zone >= 0 {
	 640  			host1, err := unescape(host[:zone], encodeHost)
	 641  			if err != nil {
	 642  				return "", err
	 643  			}
	 644  			host2, err := unescape(host[zone:i], encodeZone)
	 645  			if err != nil {
	 646  				return "", err
	 647  			}
	 648  			host3, err := unescape(host[i:], encodeHost)
	 649  			if err != nil {
	 650  				return "", err
	 651  			}
	 652  			return host1 + host2 + host3, nil
	 653  		}
	 654  	} else if i := strings.LastIndex(host, ":"); i != -1 {
	 655  		colonPort := host[i:]
	 656  		if !validOptionalPort(colonPort) {
	 657  			return "", fmt.Errorf("invalid port %q after host", colonPort)
	 658  		}
	 659  	}
	 660  
	 661  	var err error
	 662  	if host, err = unescape(host, encodeHost); err != nil {
	 663  		return "", err
	 664  	}
	 665  	return host, nil
	 666  }
	 667  
	 668  // setPath sets the Path and RawPath fields of the URL based on the provided
	 669  // escaped path p. It maintains the invariant that RawPath is only specified
	 670  // when it differs from the default encoding of the path.
	 671  // For example:
	 672  // - setPath("/foo/bar")	 will set Path="/foo/bar" and RawPath=""
	 673  // - setPath("/foo%2fbar") will set Path="/foo/bar" and RawPath="/foo%2fbar"
	 674  // setPath will return an error only if the provided path contains an invalid
	 675  // escaping.
	 676  func (u *URL) setPath(p string) error {
	 677  	path, err := unescape(p, encodePath)
	 678  	if err != nil {
	 679  		return err
	 680  	}
	 681  	u.Path = path
	 682  	if escp := escape(path, encodePath); p == escp {
	 683  		// Default encoding is fine.
	 684  		u.RawPath = ""
	 685  	} else {
	 686  		u.RawPath = p
	 687  	}
	 688  	return nil
	 689  }
	 690  
	 691  // EscapedPath returns the escaped form of u.Path.
	 692  // In general there are multiple possible escaped forms of any path.
	 693  // EscapedPath returns u.RawPath when it is a valid escaping of u.Path.
	 694  // Otherwise EscapedPath ignores u.RawPath and computes an escaped
	 695  // form on its own.
	 696  // The String and RequestURI methods use EscapedPath to construct
	 697  // their results.
	 698  // In general, code should call EscapedPath instead of
	 699  // reading u.RawPath directly.
	 700  func (u *URL) EscapedPath() string {
	 701  	if u.RawPath != "" && validEncoded(u.RawPath, encodePath) {
	 702  		p, err := unescape(u.RawPath, encodePath)
	 703  		if err == nil && p == u.Path {
	 704  			return u.RawPath
	 705  		}
	 706  	}
	 707  	if u.Path == "*" {
	 708  		return "*" // don't escape (Issue 11202)
	 709  	}
	 710  	return escape(u.Path, encodePath)
	 711  }
	 712  
	 713  // validEncoded reports whether s is a valid encoded path or fragment,
	 714  // according to mode.
	 715  // It must not contain any bytes that require escaping during encoding.
	 716  func validEncoded(s string, mode encoding) bool {
	 717  	for i := 0; i < len(s); i++ {
	 718  		// RFC 3986, Appendix A.
	 719  		// pchar = unreserved / pct-encoded / sub-delims / ":" / "@".
	 720  		// shouldEscape is not quite compliant with the RFC,
	 721  		// so we check the sub-delims ourselves and let
	 722  		// shouldEscape handle the others.
	 723  		switch s[i] {
	 724  		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '@':
	 725  			// ok
	 726  		case '[', ']':
	 727  			// ok - not specified in RFC 3986 but left alone by modern browsers
	 728  		case '%':
	 729  			// ok - percent encoded, will decode
	 730  		default:
	 731  			if shouldEscape(s[i], mode) {
	 732  				return false
	 733  			}
	 734  		}
	 735  	}
	 736  	return true
	 737  }
	 738  
	 739  // setFragment is like setPath but for Fragment/RawFragment.
	 740  func (u *URL) setFragment(f string) error {
	 741  	frag, err := unescape(f, encodeFragment)
	 742  	if err != nil {
	 743  		return err
	 744  	}
	 745  	u.Fragment = frag
	 746  	if escf := escape(frag, encodeFragment); f == escf {
	 747  		// Default encoding is fine.
	 748  		u.RawFragment = ""
	 749  	} else {
	 750  		u.RawFragment = f
	 751  	}
	 752  	return nil
	 753  }
	 754  
	 755  // EscapedFragment returns the escaped form of u.Fragment.
	 756  // In general there are multiple possible escaped forms of any fragment.
	 757  // EscapedFragment returns u.RawFragment when it is a valid escaping of u.Fragment.
	 758  // Otherwise EscapedFragment ignores u.RawFragment and computes an escaped
	 759  // form on its own.
	 760  // The String method uses EscapedFragment to construct its result.
	 761  // In general, code should call EscapedFragment instead of
	 762  // reading u.RawFragment directly.
	 763  func (u *URL) EscapedFragment() string {
	 764  	if u.RawFragment != "" && validEncoded(u.RawFragment, encodeFragment) {
	 765  		f, err := unescape(u.RawFragment, encodeFragment)
	 766  		if err == nil && f == u.Fragment {
	 767  			return u.RawFragment
	 768  		}
	 769  	}
	 770  	return escape(u.Fragment, encodeFragment)
	 771  }
	 772  
	 773  // validOptionalPort reports whether port is either an empty string
	 774  // or matches /^:\d*$/
	 775  func validOptionalPort(port string) bool {
	 776  	if port == "" {
	 777  		return true
	 778  	}
	 779  	if port[0] != ':' {
	 780  		return false
	 781  	}
	 782  	for _, b := range port[1:] {
	 783  		if b < '0' || b > '9' {
	 784  			return false
	 785  		}
	 786  	}
	 787  	return true
	 788  }
	 789  
	 790  // String reassembles the URL into a valid URL string.
	 791  // The general form of the result is one of:
	 792  //
	 793  //	scheme:opaque?query#fragment
	 794  //	scheme://userinfo@host/path?query#fragment
	 795  //
	 796  // If u.Opaque is non-empty, String uses the first form;
	 797  // otherwise it uses the second form.
	 798  // Any non-ASCII characters in host are escaped.
	 799  // To obtain the path, String uses u.EscapedPath().
	 800  //
	 801  // In the second form, the following rules apply:
	 802  //	- if u.Scheme is empty, scheme: is omitted.
	 803  //	- if u.User is nil, userinfo@ is omitted.
	 804  //	- if u.Host is empty, host/ is omitted.
	 805  //	- if u.Scheme and u.Host are empty and u.User is nil,
	 806  //		 the entire scheme://userinfo@host/ is omitted.
	 807  //	- if u.Host is non-empty and u.Path begins with a /,
	 808  //		 the form host/path does not add its own /.
	 809  //	- if u.RawQuery is empty, ?query is omitted.
	 810  //	- if u.Fragment is empty, #fragment is omitted.
	 811  func (u *URL) String() string {
	 812  	var buf strings.Builder
	 813  	if u.Scheme != "" {
	 814  		buf.WriteString(u.Scheme)
	 815  		buf.WriteByte(':')
	 816  	}
	 817  	if u.Opaque != "" {
	 818  		buf.WriteString(u.Opaque)
	 819  	} else {
	 820  		if u.Scheme != "" || u.Host != "" || u.User != nil {
	 821  			if u.Host != "" || u.Path != "" || u.User != nil {
	 822  				buf.WriteString("//")
	 823  			}
	 824  			if ui := u.User; ui != nil {
	 825  				buf.WriteString(ui.String())
	 826  				buf.WriteByte('@')
	 827  			}
	 828  			if h := u.Host; h != "" {
	 829  				buf.WriteString(escape(h, encodeHost))
	 830  			}
	 831  		}
	 832  		path := u.EscapedPath()
	 833  		if path != "" && path[0] != '/' && u.Host != "" {
	 834  			buf.WriteByte('/')
	 835  		}
	 836  		if buf.Len() == 0 {
	 837  			// RFC 3986 §4.2
	 838  			// A path segment that contains a colon character (e.g., "this:that")
	 839  			// cannot be used as the first segment of a relative-path reference, as
	 840  			// it would be mistaken for a scheme name. Such a segment must be
	 841  			// preceded by a dot-segment (e.g., "./this:that") to make a relative-
	 842  			// path reference.
	 843  			if i := strings.IndexByte(path, ':'); i > -1 && strings.IndexByte(path[:i], '/') == -1 {
	 844  				buf.WriteString("./")
	 845  			}
	 846  		}
	 847  		buf.WriteString(path)
	 848  	}
	 849  	if u.ForceQuery || u.RawQuery != "" {
	 850  		buf.WriteByte('?')
	 851  		buf.WriteString(u.RawQuery)
	 852  	}
	 853  	if u.Fragment != "" {
	 854  		buf.WriteByte('#')
	 855  		buf.WriteString(u.EscapedFragment())
	 856  	}
	 857  	return buf.String()
	 858  }
	 859  
	 860  // Redacted is like String but replaces any password with "xxxxx".
	 861  // Only the password in u.URL is redacted.
	 862  func (u *URL) Redacted() string {
	 863  	if u == nil {
	 864  		return ""
	 865  	}
	 866  
	 867  	ru := *u
	 868  	if _, has := ru.User.Password(); has {
	 869  		ru.User = UserPassword(ru.User.Username(), "xxxxx")
	 870  	}
	 871  	return ru.String()
	 872  }
	 873  
	 874  // Values maps a string key to a list of values.
	 875  // It is typically used for query parameters and form values.
	 876  // Unlike in the http.Header map, the keys in a Values map
	 877  // are case-sensitive.
	 878  type Values map[string][]string
	 879  
	 880  // Get gets the first value associated with the given key.
	 881  // If there are no values associated with the key, Get returns
	 882  // the empty string. To access multiple values, use the map
	 883  // directly.
	 884  func (v Values) Get(key string) string {
	 885  	if v == nil {
	 886  		return ""
	 887  	}
	 888  	vs := v[key]
	 889  	if len(vs) == 0 {
	 890  		return ""
	 891  	}
	 892  	return vs[0]
	 893  }
	 894  
	 895  // Set sets the key to value. It replaces any existing
	 896  // values.
	 897  func (v Values) Set(key, value string) {
	 898  	v[key] = []string{value}
	 899  }
	 900  
	 901  // Add adds the value to key. It appends to any existing
	 902  // values associated with key.
	 903  func (v Values) Add(key, value string) {
	 904  	v[key] = append(v[key], value)
	 905  }
	 906  
	 907  // Del deletes the values associated with key.
	 908  func (v Values) Del(key string) {
	 909  	delete(v, key)
	 910  }
	 911  
	 912  // Has checks whether a given key is set.
	 913  func (v Values) Has(key string) bool {
	 914  	_, ok := v[key]
	 915  	return ok
	 916  }
	 917  
	 918  // ParseQuery parses the URL-encoded query string and returns
	 919  // a map listing the values specified for each key.
	 920  // ParseQuery always returns a non-nil map containing all the
	 921  // valid query parameters found; err describes the first decoding error
	 922  // encountered, if any.
	 923  //
	 924  // Query is expected to be a list of key=value settings separated by ampersands.
	 925  // A setting without an equals sign is interpreted as a key set to an empty
	 926  // value.
	 927  // Settings containing a non-URL-encoded semicolon are considered invalid.
	 928  func ParseQuery(query string) (Values, error) {
	 929  	m := make(Values)
	 930  	err := parseQuery(m, query)
	 931  	return m, err
	 932  }
	 933  
	 934  func parseQuery(m Values, query string) (err error) {
	 935  	for query != "" {
	 936  		key := query
	 937  		if i := strings.IndexAny(key, "&"); i >= 0 {
	 938  			key, query = key[:i], key[i+1:]
	 939  		} else {
	 940  			query = ""
	 941  		}
	 942  		if strings.Contains(key, ";") {
	 943  			err = fmt.Errorf("invalid semicolon separator in query")
	 944  			continue
	 945  		}
	 946  		if key == "" {
	 947  			continue
	 948  		}
	 949  		value := ""
	 950  		if i := strings.Index(key, "="); i >= 0 {
	 951  			key, value = key[:i], key[i+1:]
	 952  		}
	 953  		key, err1 := QueryUnescape(key)
	 954  		if err1 != nil {
	 955  			if err == nil {
	 956  				err = err1
	 957  			}
	 958  			continue
	 959  		}
	 960  		value, err1 = QueryUnescape(value)
	 961  		if err1 != nil {
	 962  			if err == nil {
	 963  				err = err1
	 964  			}
	 965  			continue
	 966  		}
	 967  		m[key] = append(m[key], value)
	 968  	}
	 969  	return err
	 970  }
	 971  
	 972  // Encode encodes the values into ``URL encoded'' form
	 973  // ("bar=baz&foo=quux") sorted by key.
	 974  func (v Values) Encode() string {
	 975  	if v == nil {
	 976  		return ""
	 977  	}
	 978  	var buf strings.Builder
	 979  	keys := make([]string, 0, len(v))
	 980  	for k := range v {
	 981  		keys = append(keys, k)
	 982  	}
	 983  	sort.Strings(keys)
	 984  	for _, k := range keys {
	 985  		vs := v[k]
	 986  		keyEscaped := QueryEscape(k)
	 987  		for _, v := range vs {
	 988  			if buf.Len() > 0 {
	 989  				buf.WriteByte('&')
	 990  			}
	 991  			buf.WriteString(keyEscaped)
	 992  			buf.WriteByte('=')
	 993  			buf.WriteString(QueryEscape(v))
	 994  		}
	 995  	}
	 996  	return buf.String()
	 997  }
	 998  
	 999  // resolvePath applies special path segments from refs and applies
	1000  // them to base, per RFC 3986.
	1001  func resolvePath(base, ref string) string {
	1002  	var full string
	1003  	if ref == "" {
	1004  		full = base
	1005  	} else if ref[0] != '/' {
	1006  		i := strings.LastIndex(base, "/")
	1007  		full = base[:i+1] + ref
	1008  	} else {
	1009  		full = ref
	1010  	}
	1011  	if full == "" {
	1012  		return ""
	1013  	}
	1014  
	1015  	var (
	1016  		last string
	1017  		elem string
	1018  		i		int
	1019  		dst	strings.Builder
	1020  	)
	1021  	first := true
	1022  	remaining := full
	1023  	// We want to return a leading '/', so write it now.
	1024  	dst.WriteByte('/')
	1025  	for i >= 0 {
	1026  		i = strings.IndexByte(remaining, '/')
	1027  		if i < 0 {
	1028  			last, elem, remaining = remaining, remaining, ""
	1029  		} else {
	1030  			elem, remaining = remaining[:i], remaining[i+1:]
	1031  		}
	1032  		if elem == "." {
	1033  			first = false
	1034  			// drop
	1035  			continue
	1036  		}
	1037  
	1038  		if elem == ".." {
	1039  			// Ignore the leading '/' we already wrote.
	1040  			str := dst.String()[1:]
	1041  			index := strings.LastIndexByte(str, '/')
	1042  
	1043  			dst.Reset()
	1044  			dst.WriteByte('/')
	1045  			if index == -1 {
	1046  				first = true
	1047  			} else {
	1048  				dst.WriteString(str[:index])
	1049  			}
	1050  		} else {
	1051  			if !first {
	1052  				dst.WriteByte('/')
	1053  			}
	1054  			dst.WriteString(elem)
	1055  			first = false
	1056  		}
	1057  	}
	1058  
	1059  	if last == "." || last == ".." {
	1060  		dst.WriteByte('/')
	1061  	}
	1062  
	1063  	// We wrote an initial '/', but we don't want two.
	1064  	r := dst.String()
	1065  	if len(r) > 1 && r[1] == '/' {
	1066  		r = r[1:]
	1067  	}
	1068  	return r
	1069  }
	1070  
	1071  // IsAbs reports whether the URL is absolute.
	1072  // Absolute means that it has a non-empty scheme.
	1073  func (u *URL) IsAbs() bool {
	1074  	return u.Scheme != ""
	1075  }
	1076  
	1077  // Parse parses a URL in the context of the receiver. The provided URL
	1078  // may be relative or absolute. Parse returns nil, err on parse
	1079  // failure, otherwise its return value is the same as ResolveReference.
	1080  func (u *URL) Parse(ref string) (*URL, error) {
	1081  	refURL, err := Parse(ref)
	1082  	if err != nil {
	1083  		return nil, err
	1084  	}
	1085  	return u.ResolveReference(refURL), nil
	1086  }
	1087  
	1088  // ResolveReference resolves a URI reference to an absolute URI from
	1089  // an absolute base URI u, per RFC 3986 Section 5.2. The URI reference
	1090  // may be relative or absolute. ResolveReference always returns a new
	1091  // URL instance, even if the returned URL is identical to either the
	1092  // base or reference. If ref is an absolute URL, then ResolveReference
	1093  // ignores base and returns a copy of ref.
	1094  func (u *URL) ResolveReference(ref *URL) *URL {
	1095  	url := *ref
	1096  	if ref.Scheme == "" {
	1097  		url.Scheme = u.Scheme
	1098  	}
	1099  	if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
	1100  		// The "absoluteURI" or "net_path" cases.
	1101  		// We can ignore the error from setPath since we know we provided a
	1102  		// validly-escaped path.
	1103  		url.setPath(resolvePath(ref.EscapedPath(), ""))
	1104  		return &url
	1105  	}
	1106  	if ref.Opaque != "" {
	1107  		url.User = nil
	1108  		url.Host = ""
	1109  		url.Path = ""
	1110  		return &url
	1111  	}
	1112  	if ref.Path == "" && ref.RawQuery == "" {
	1113  		url.RawQuery = u.RawQuery
	1114  		if ref.Fragment == "" {
	1115  			url.Fragment = u.Fragment
	1116  			url.RawFragment = u.RawFragment
	1117  		}
	1118  	}
	1119  	// The "abs_path" or "rel_path" cases.
	1120  	url.Host = u.Host
	1121  	url.User = u.User
	1122  	url.setPath(resolvePath(u.EscapedPath(), ref.EscapedPath()))
	1123  	return &url
	1124  }
	1125  
	1126  // Query parses RawQuery and returns the corresponding values.
	1127  // It silently discards malformed value pairs.
	1128  // To check errors use ParseQuery.
	1129  func (u *URL) Query() Values {
	1130  	v, _ := ParseQuery(u.RawQuery)
	1131  	return v
	1132  }
	1133  
	1134  // RequestURI returns the encoded path?query or opaque?query
	1135  // string that would be used in an HTTP request for u.
	1136  func (u *URL) RequestURI() string {
	1137  	result := u.Opaque
	1138  	if result == "" {
	1139  		result = u.EscapedPath()
	1140  		if result == "" {
	1141  			result = "/"
	1142  		}
	1143  	} else {
	1144  		if strings.HasPrefix(result, "//") {
	1145  			result = u.Scheme + ":" + result
	1146  		}
	1147  	}
	1148  	if u.ForceQuery || u.RawQuery != "" {
	1149  		result += "?" + u.RawQuery
	1150  	}
	1151  	return result
	1152  }
	1153  
	1154  // Hostname returns u.Host, stripping any valid port number if present.
	1155  //
	1156  // If the result is enclosed in square brackets, as literal IPv6 addresses are,
	1157  // the square brackets are removed from the result.
	1158  func (u *URL) Hostname() string {
	1159  	host, _ := splitHostPort(u.Host)
	1160  	return host
	1161  }
	1162  
	1163  // Port returns the port part of u.Host, without the leading colon.
	1164  //
	1165  // If u.Host doesn't contain a valid numeric port, Port returns an empty string.
	1166  func (u *URL) Port() string {
	1167  	_, port := splitHostPort(u.Host)
	1168  	return port
	1169  }
	1170  
	1171  // splitHostPort separates host and port. If the port is not valid, it returns
	1172  // the entire input as host, and it doesn't check the validity of the host.
	1173  // Unlike net.SplitHostPort, but per RFC 3986, it requires ports to be numeric.
	1174  func splitHostPort(hostPort string) (host, port string) {
	1175  	host = hostPort
	1176  
	1177  	colon := strings.LastIndexByte(host, ':')
	1178  	if colon != -1 && validOptionalPort(host[colon:]) {
	1179  		host, port = host[:colon], host[colon+1:]
	1180  	}
	1181  
	1182  	if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
	1183  		host = host[1 : len(host)-1]
	1184  	}
	1185  
	1186  	return
	1187  }
	1188  
	1189  // Marshaling interface implementations.
	1190  // Would like to implement MarshalText/UnmarshalText but that will change the JSON representation of URLs.
	1191  
	1192  func (u *URL) MarshalBinary() (text []byte, err error) {
	1193  	return []byte(u.String()), nil
	1194  }
	1195  
	1196  func (u *URL) UnmarshalBinary(text []byte) error {
	1197  	u1, err := Parse(string(text))
	1198  	if err != nil {
	1199  		return err
	1200  	}
	1201  	*u = *u1
	1202  	return nil
	1203  }
	1204  
	1205  // validUserinfo reports whether s is a valid userinfo string per RFC 3986
	1206  // Section 3.2.1:
	1207  //		 userinfo		= *( unreserved / pct-encoded / sub-delims / ":" )
	1208  //		 unreserved	= ALPHA / DIGIT / "-" / "." / "_" / "~"
	1209  //		 sub-delims	= "!" / "$" / "&" / "'" / "(" / ")"
	1210  //									 / "*" / "+" / "," / ";" / "="
	1211  //
	1212  // It doesn't validate pct-encoded. The caller does that via func unescape.
	1213  func validUserinfo(s string) bool {
	1214  	for _, r := range s {
	1215  		if 'A' <= r && r <= 'Z' {
	1216  			continue
	1217  		}
	1218  		if 'a' <= r && r <= 'z' {
	1219  			continue
	1220  		}
	1221  		if '0' <= r && r <= '9' {
	1222  			continue
	1223  		}
	1224  		switch r {
	1225  		case '-', '.', '_', ':', '~', '!', '$', '&', '\'',
	1226  			'(', ')', '*', '+', ',', ';', '=', '%', '@':
	1227  			continue
	1228  		default:
	1229  			return false
	1230  		}
	1231  	}
	1232  	return true
	1233  }
	1234  
	1235  // stringContainsCTLByte reports whether s contains any ASCII control character.
	1236  func stringContainsCTLByte(s string) bool {
	1237  	for i := 0; i < len(s); i++ {
	1238  		b := s[i]
	1239  		if b < ' ' || b == 0x7f {
	1240  			return true
	1241  		}
	1242  	}
	1243  	return false
	1244  }
	1245
View as plain text