transition.go

Documentation: html/template

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package template
		 6  
		 7  import (
		 8  	"bytes"
		 9  	"strings"
		10  )
		11  
		12  // transitionFunc is the array of context transition functions for text nodes.
		13  // A transition function takes a context and template text input, and returns
		14  // the updated context and the number of bytes consumed from the front of the
		15  // input.
		16  var transitionFunc = [...]func(context, []byte) (context, int){
		17  	stateText:				tText,
		18  	stateTag:				 tTag,
		19  	stateAttrName:		tAttrName,
		20  	stateAfterName:	 tAfterName,
		21  	stateBeforeValue: tBeforeValue,
		22  	stateHTMLCmt:		 tHTMLCmt,
		23  	stateRCDATA:			tSpecialTagEnd,
		24  	stateAttr:				tAttr,
		25  	stateURL:				 tURL,
		26  	stateSrcset:			tURL,
		27  	stateJS:					tJS,
		28  	stateJSDqStr:		 tJSDelimited,
		29  	stateJSSqStr:		 tJSDelimited,
		30  	stateJSRegexp:		tJSDelimited,
		31  	stateJSBlockCmt:	tBlockCmt,
		32  	stateJSLineCmt:	 tLineCmt,
		33  	stateCSS:				 tCSS,
		34  	stateCSSDqStr:		tCSSStr,
		35  	stateCSSSqStr:		tCSSStr,
		36  	stateCSSDqURL:		tCSSStr,
		37  	stateCSSSqURL:		tCSSStr,
		38  	stateCSSURL:			tCSSStr,
		39  	stateCSSBlockCmt: tBlockCmt,
		40  	stateCSSLineCmt:	tLineCmt,
		41  	stateError:			 tError,
		42  }
		43  
		44  var commentStart = []byte("<!--")
		45  var commentEnd = []byte("-->")
		46  
		47  // tText is the context transition function for the text state.
		48  func tText(c context, s []byte) (context, int) {
		49  	k := 0
		50  	for {
		51  		i := k + bytes.IndexByte(s[k:], '<')
		52  		if i < k || i+1 == len(s) {
		53  			return c, len(s)
		54  		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
		55  			return context{state: stateHTMLCmt}, i + 4
		56  		}
		57  		i++
		58  		end := false
		59  		if s[i] == '/' {
		60  			if i+1 == len(s) {
		61  				return c, len(s)
		62  			}
		63  			end, i = true, i+1
		64  		}
		65  		j, e := eatTagName(s, i)
		66  		if j != i {
		67  			if end {
		68  				e = elementNone
		69  			}
		70  			// We've found an HTML tag.
		71  			return context{state: stateTag, element: e}, j
		72  		}
		73  		k = j
		74  	}
		75  }
		76  
		77  var elementContentType = [...]state{
		78  	elementNone:		 stateText,
		79  	elementScript:	 stateJS,
		80  	elementStyle:		stateCSS,
		81  	elementTextarea: stateRCDATA,
		82  	elementTitle:		stateRCDATA,
		83  }
		84  
		85  // tTag is the context transition function for the tag state.
		86  func tTag(c context, s []byte) (context, int) {
		87  	// Find the attribute name.
		88  	i := eatWhiteSpace(s, 0)
		89  	if i == len(s) {
		90  		return c, len(s)
		91  	}
		92  	if s[i] == '>' {
		93  		return context{
		94  			state:	 elementContentType[c.element],
		95  			element: c.element,
		96  		}, i + 1
		97  	}
		98  	j, err := eatAttrName(s, i)
		99  	if err != nil {
	 100  		return context{state: stateError, err: err}, len(s)
	 101  	}
	 102  	state, attr := stateTag, attrNone
	 103  	if i == j {
	 104  		return context{
	 105  			state: stateError,
	 106  			err:	 errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
	 107  		}, len(s)
	 108  	}
	 109  
	 110  	attrName := strings.ToLower(string(s[i:j]))
	 111  	if c.element == elementScript && attrName == "type" {
	 112  		attr = attrScriptType
	 113  	} else {
	 114  		switch attrType(attrName) {
	 115  		case contentTypeURL:
	 116  			attr = attrURL
	 117  		case contentTypeCSS:
	 118  			attr = attrStyle
	 119  		case contentTypeJS:
	 120  			attr = attrScript
	 121  		case contentTypeSrcset:
	 122  			attr = attrSrcset
	 123  		}
	 124  	}
	 125  
	 126  	if j == len(s) {
	 127  		state = stateAttrName
	 128  	} else {
	 129  		state = stateAfterName
	 130  	}
	 131  	return context{state: state, element: c.element, attr: attr}, j
	 132  }
	 133  
	 134  // tAttrName is the context transition function for stateAttrName.
	 135  func tAttrName(c context, s []byte) (context, int) {
	 136  	i, err := eatAttrName(s, 0)
	 137  	if err != nil {
	 138  		return context{state: stateError, err: err}, len(s)
	 139  	} else if i != len(s) {
	 140  		c.state = stateAfterName
	 141  	}
	 142  	return c, i
	 143  }
	 144  
	 145  // tAfterName is the context transition function for stateAfterName.
	 146  func tAfterName(c context, s []byte) (context, int) {
	 147  	// Look for the start of the value.
	 148  	i := eatWhiteSpace(s, 0)
	 149  	if i == len(s) {
	 150  		return c, len(s)
	 151  	} else if s[i] != '=' {
	 152  		// Occurs due to tag ending '>', and valueless attribute.
	 153  		c.state = stateTag
	 154  		return c, i
	 155  	}
	 156  	c.state = stateBeforeValue
	 157  	// Consume the "=".
	 158  	return c, i + 1
	 159  }
	 160  
	 161  var attrStartStates = [...]state{
	 162  	attrNone:			 stateAttr,
	 163  	attrScript:		 stateJS,
	 164  	attrScriptType: stateAttr,
	 165  	attrStyle:			stateCSS,
	 166  	attrURL:				stateURL,
	 167  	attrSrcset:		 stateSrcset,
	 168  }
	 169  
	 170  // tBeforeValue is the context transition function for stateBeforeValue.
	 171  func tBeforeValue(c context, s []byte) (context, int) {
	 172  	i := eatWhiteSpace(s, 0)
	 173  	if i == len(s) {
	 174  		return c, len(s)
	 175  	}
	 176  	// Find the attribute delimiter.
	 177  	delim := delimSpaceOrTagEnd
	 178  	switch s[i] {
	 179  	case '\'':
	 180  		delim, i = delimSingleQuote, i+1
	 181  	case '"':
	 182  		delim, i = delimDoubleQuote, i+1
	 183  	}
	 184  	c.state, c.delim = attrStartStates[c.attr], delim
	 185  	return c, i
	 186  }
	 187  
	 188  // tHTMLCmt is the context transition function for stateHTMLCmt.
	 189  func tHTMLCmt(c context, s []byte) (context, int) {
	 190  	if i := bytes.Index(s, commentEnd); i != -1 {
	 191  		return context{}, i + 3
	 192  	}
	 193  	return c, len(s)
	 194  }
	 195  
	 196  // specialTagEndMarkers maps element types to the character sequence that
	 197  // case-insensitively signals the end of the special tag body.
	 198  var specialTagEndMarkers = [...][]byte{
	 199  	elementScript:	 []byte("script"),
	 200  	elementStyle:		[]byte("style"),
	 201  	elementTextarea: []byte("textarea"),
	 202  	elementTitle:		[]byte("title"),
	 203  }
	 204  
	 205  var (
	 206  	specialTagEndPrefix = []byte("</")
	 207  	tagEndSeparators		= []byte("> \t\n\f/")
	 208  )
	 209  
	 210  // tSpecialTagEnd is the context transition function for raw text and RCDATA
	 211  // element states.
	 212  func tSpecialTagEnd(c context, s []byte) (context, int) {
	 213  	if c.element != elementNone {
	 214  		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
	 215  			return context{}, i
	 216  		}
	 217  	}
	 218  	return c, len(s)
	 219  }
	 220  
	 221  // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
	 222  func indexTagEnd(s []byte, tag []byte) int {
	 223  	res := 0
	 224  	plen := len(specialTagEndPrefix)
	 225  	for len(s) > 0 {
	 226  		// Try to find the tag end prefix first
	 227  		i := bytes.Index(s, specialTagEndPrefix)
	 228  		if i == -1 {
	 229  			return i
	 230  		}
	 231  		s = s[i+plen:]
	 232  		// Try to match the actual tag if there is still space for it
	 233  		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
	 234  			s = s[len(tag):]
	 235  			// Check the tag is followed by a proper separator
	 236  			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
	 237  				return res + i
	 238  			}
	 239  			res += len(tag)
	 240  		}
	 241  		res += i + plen
	 242  	}
	 243  	return -1
	 244  }
	 245  
	 246  // tAttr is the context transition function for the attribute state.
	 247  func tAttr(c context, s []byte) (context, int) {
	 248  	return c, len(s)
	 249  }
	 250  
	 251  // tURL is the context transition function for the URL state.
	 252  func tURL(c context, s []byte) (context, int) {
	 253  	if bytes.ContainsAny(s, "#?") {
	 254  		c.urlPart = urlPartQueryOrFrag
	 255  	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
	 256  		// HTML5 uses "Valid URL potentially surrounded by spaces" for
	 257  		// attrs: https://www.w3.org/TR/html5/index.html#attributes-1
	 258  		c.urlPart = urlPartPreQuery
	 259  	}
	 260  	return c, len(s)
	 261  }
	 262  
	 263  // tJS is the context transition function for the JS state.
	 264  func tJS(c context, s []byte) (context, int) {
	 265  	i := bytes.IndexAny(s, `"'/`)
	 266  	if i == -1 {
	 267  		// Entire input is non string, comment, regexp tokens.
	 268  		c.jsCtx = nextJSCtx(s, c.jsCtx)
	 269  		return c, len(s)
	 270  	}
	 271  	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
	 272  	switch s[i] {
	 273  	case '"':
	 274  		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
	 275  	case '\'':
	 276  		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
	 277  	case '/':
	 278  		switch {
	 279  		case i+1 < len(s) && s[i+1] == '/':
	 280  			c.state, i = stateJSLineCmt, i+1
	 281  		case i+1 < len(s) && s[i+1] == '*':
	 282  			c.state, i = stateJSBlockCmt, i+1
	 283  		case c.jsCtx == jsCtxRegexp:
	 284  			c.state = stateJSRegexp
	 285  		case c.jsCtx == jsCtxDivOp:
	 286  			c.jsCtx = jsCtxRegexp
	 287  		default:
	 288  			return context{
	 289  				state: stateError,
	 290  				err:	 errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
	 291  			}, len(s)
	 292  		}
	 293  	default:
	 294  		panic("unreachable")
	 295  	}
	 296  	return c, i + 1
	 297  }
	 298  
	 299  // tJSDelimited is the context transition function for the JS string and regexp
	 300  // states.
	 301  func tJSDelimited(c context, s []byte) (context, int) {
	 302  	specials := `\"`
	 303  	switch c.state {
	 304  	case stateJSSqStr:
	 305  		specials = `\'`
	 306  	case stateJSRegexp:
	 307  		specials = `\/[]`
	 308  	}
	 309  
	 310  	k, inCharset := 0, false
	 311  	for {
	 312  		i := k + bytes.IndexAny(s[k:], specials)
	 313  		if i < k {
	 314  			break
	 315  		}
	 316  		switch s[i] {
	 317  		case '\\':
	 318  			i++
	 319  			if i == len(s) {
	 320  				return context{
	 321  					state: stateError,
	 322  					err:	 errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
	 323  				}, len(s)
	 324  			}
	 325  		case '[':
	 326  			inCharset = true
	 327  		case ']':
	 328  			inCharset = false
	 329  		default:
	 330  			// end delimiter
	 331  			if !inCharset {
	 332  				c.state, c.jsCtx = stateJS, jsCtxDivOp
	 333  				return c, i + 1
	 334  			}
	 335  		}
	 336  		k = i + 1
	 337  	}
	 338  
	 339  	if inCharset {
	 340  		// This can be fixed by making context richer if interpolation
	 341  		// into charsets is desired.
	 342  		return context{
	 343  			state: stateError,
	 344  			err:	 errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
	 345  		}, len(s)
	 346  	}
	 347  
	 348  	return c, len(s)
	 349  }
	 350  
	 351  var blockCommentEnd = []byte("*/")
	 352  
	 353  // tBlockCmt is the context transition function for /*comment*/ states.
	 354  func tBlockCmt(c context, s []byte) (context, int) {
	 355  	i := bytes.Index(s, blockCommentEnd)
	 356  	if i == -1 {
	 357  		return c, len(s)
	 358  	}
	 359  	switch c.state {
	 360  	case stateJSBlockCmt:
	 361  		c.state = stateJS
	 362  	case stateCSSBlockCmt:
	 363  		c.state = stateCSS
	 364  	default:
	 365  		panic(c.state.String())
	 366  	}
	 367  	return c, i + 2
	 368  }
	 369  
	 370  // tLineCmt is the context transition function for //comment states.
	 371  func tLineCmt(c context, s []byte) (context, int) {
	 372  	var lineTerminators string
	 373  	var endState state
	 374  	switch c.state {
	 375  	case stateJSLineCmt:
	 376  		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
	 377  	case stateCSSLineCmt:
	 378  		lineTerminators, endState = "\n\f\r", stateCSS
	 379  		// Line comments are not part of any published CSS standard but
	 380  		// are supported by the 4 major browsers.
	 381  		// This defines line comments as
	 382  		//		 LINECOMMENT ::= "//" [^\n\f\d]*
	 383  		// since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
	 384  		// newlines:
	 385  		//		 nl ::= #xA | #xD #xA | #xD | #xC
	 386  	default:
	 387  		panic(c.state.String())
	 388  	}
	 389  
	 390  	i := bytes.IndexAny(s, lineTerminators)
	 391  	if i == -1 {
	 392  		return c, len(s)
	 393  	}
	 394  	c.state = endState
	 395  	// Per section 7.4 of EcmaScript 5 : https://es5.github.com/#x7.4
	 396  	// "However, the LineTerminator at the end of the line is not
	 397  	// considered to be part of the single-line comment; it is
	 398  	// recognized separately by the lexical grammar and becomes part
	 399  	// of the stream of input elements for the syntactic grammar."
	 400  	return c, i
	 401  }
	 402  
	 403  // tCSS is the context transition function for the CSS state.
	 404  func tCSS(c context, s []byte) (context, int) {
	 405  	// CSS quoted strings are almost never used except for:
	 406  	// (1) URLs as in background: "/foo.png"
	 407  	// (2) Multiword font-names as in font-family: "Times New Roman"
	 408  	// (3) List separators in content values as in inline-lists:
	 409  	//		<style>
	 410  	//		ul.inlineList { list-style: none; padding:0 }
	 411  	//		ul.inlineList > li { display: inline }
	 412  	//		ul.inlineList > li:before { content: ", " }
	 413  	//		ul.inlineList > li:first-child:before { content: "" }
	 414  	//		</style>
	 415  	//		<ul class=inlineList><li>One<li>Two<li>Three</ul>
	 416  	// (4) Attribute value selectors as in a[href="http://example.com/"]
	 417  	//
	 418  	// We conservatively treat all strings as URLs, but make some
	 419  	// allowances to avoid confusion.
	 420  	//
	 421  	// In (1), our conservative assumption is justified.
	 422  	// In (2), valid font names do not contain ':', '?', or '#', so our
	 423  	// conservative assumption is fine since we will never transition past
	 424  	// urlPartPreQuery.
	 425  	// In (3), our protocol heuristic should not be tripped, and there
	 426  	// should not be non-space content after a '?' or '#', so as long as
	 427  	// we only %-encode RFC 3986 reserved characters we are ok.
	 428  	// In (4), we should URL escape for URL attributes, and for others we
	 429  	// have the attribute name available if our conservative assumption
	 430  	// proves problematic for real code.
	 431  
	 432  	k := 0
	 433  	for {
	 434  		i := k + bytes.IndexAny(s[k:], `("'/`)
	 435  		if i < k {
	 436  			return c, len(s)
	 437  		}
	 438  		switch s[i] {
	 439  		case '(':
	 440  			// Look for url to the left.
	 441  			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
	 442  			if endsWithCSSKeyword(p, "url") {
	 443  				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
	 444  				switch {
	 445  				case j != len(s) && s[j] == '"':
	 446  					c.state, j = stateCSSDqURL, j+1
	 447  				case j != len(s) && s[j] == '\'':
	 448  					c.state, j = stateCSSSqURL, j+1
	 449  				default:
	 450  					c.state = stateCSSURL
	 451  				}
	 452  				return c, j
	 453  			}
	 454  		case '/':
	 455  			if i+1 < len(s) {
	 456  				switch s[i+1] {
	 457  				case '/':
	 458  					c.state = stateCSSLineCmt
	 459  					return c, i + 2
	 460  				case '*':
	 461  					c.state = stateCSSBlockCmt
	 462  					return c, i + 2
	 463  				}
	 464  			}
	 465  		case '"':
	 466  			c.state = stateCSSDqStr
	 467  			return c, i + 1
	 468  		case '\'':
	 469  			c.state = stateCSSSqStr
	 470  			return c, i + 1
	 471  		}
	 472  		k = i + 1
	 473  	}
	 474  }
	 475  
	 476  // tCSSStr is the context transition function for the CSS string and URL states.
	 477  func tCSSStr(c context, s []byte) (context, int) {
	 478  	var endAndEsc string
	 479  	switch c.state {
	 480  	case stateCSSDqStr, stateCSSDqURL:
	 481  		endAndEsc = `\"`
	 482  	case stateCSSSqStr, stateCSSSqURL:
	 483  		endAndEsc = `\'`
	 484  	case stateCSSURL:
	 485  		// Unquoted URLs end with a newline or close parenthesis.
	 486  		// The below includes the wc (whitespace character) and nl.
	 487  		endAndEsc = "\\\t\n\f\r )"
	 488  	default:
	 489  		panic(c.state.String())
	 490  	}
	 491  
	 492  	k := 0
	 493  	for {
	 494  		i := k + bytes.IndexAny(s[k:], endAndEsc)
	 495  		if i < k {
	 496  			c, nread := tURL(c, decodeCSS(s[k:]))
	 497  			return c, k + nread
	 498  		}
	 499  		if s[i] == '\\' {
	 500  			i++
	 501  			if i == len(s) {
	 502  				return context{
	 503  					state: stateError,
	 504  					err:	 errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
	 505  				}, len(s)
	 506  			}
	 507  		} else {
	 508  			c.state = stateCSS
	 509  			return c, i + 1
	 510  		}
	 511  		c, _ = tURL(c, decodeCSS(s[:i+1]))
	 512  		k = i + 1
	 513  	}
	 514  }
	 515  
	 516  // tError is the context transition function for the error state.
	 517  func tError(c context, s []byte) (context, int) {
	 518  	return c, len(s)
	 519  }
	 520  
	 521  // eatAttrName returns the largest j such that s[i:j] is an attribute name.
	 522  // It returns an error if s[i:] does not look like it begins with an
	 523  // attribute name, such as encountering a quote mark without a preceding
	 524  // equals sign.
	 525  func eatAttrName(s []byte, i int) (int, *Error) {
	 526  	for j := i; j < len(s); j++ {
	 527  		switch s[j] {
	 528  		case ' ', '\t', '\n', '\f', '\r', '=', '>':
	 529  			return j, nil
	 530  		case '\'', '"', '<':
	 531  			// These result in a parse warning in HTML5 and are
	 532  			// indicative of serious problems if seen in an attr
	 533  			// name in a template.
	 534  			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
	 535  		default:
	 536  			// No-op.
	 537  		}
	 538  	}
	 539  	return len(s), nil
	 540  }
	 541  
	 542  var elementNameMap = map[string]element{
	 543  	"script":	 elementScript,
	 544  	"style":		elementStyle,
	 545  	"textarea": elementTextarea,
	 546  	"title":		elementTitle,
	 547  }
	 548  
	 549  // asciiAlpha reports whether c is an ASCII letter.
	 550  func asciiAlpha(c byte) bool {
	 551  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
	 552  }
	 553  
	 554  // asciiAlphaNum reports whether c is an ASCII letter or digit.
	 555  func asciiAlphaNum(c byte) bool {
	 556  	return asciiAlpha(c) || '0' <= c && c <= '9'
	 557  }
	 558  
	 559  // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
	 560  func eatTagName(s []byte, i int) (int, element) {
	 561  	if i == len(s) || !asciiAlpha(s[i]) {
	 562  		return i, elementNone
	 563  	}
	 564  	j := i + 1
	 565  	for j < len(s) {
	 566  		x := s[j]
	 567  		if asciiAlphaNum(x) {
	 568  			j++
	 569  			continue
	 570  		}
	 571  		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
	 572  		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
	 573  			j += 2
	 574  			continue
	 575  		}
	 576  		break
	 577  	}
	 578  	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
	 579  }
	 580  
	 581  // eatWhiteSpace returns the largest j such that s[i:j] is white space.
	 582  func eatWhiteSpace(s []byte, i int) int {
	 583  	for j := i; j < len(s); j++ {
	 584  		switch s[j] {
	 585  		case ' ', '\t', '\n', '\f', '\r':
	 586  			// No-op.
	 587  		default:
	 588  			return j
	 589  		}
	 590  	}
	 591  	return len(s)
	 592  }
	 593
View as plain text