html.go

Documentation: html/template

		 1  // Copyright 2011 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package template
		 6  
		 7  import (
		 8  	"bytes"
		 9  	"fmt"
		10  	"strings"
		11  	"unicode/utf8"
		12  )
		13  
		14  // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
		15  func htmlNospaceEscaper(args ...interface{}) string {
		16  	s, t := stringify(args...)
		17  	if t == contentTypeHTML {
		18  		return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
		19  	}
		20  	return htmlReplacer(s, htmlNospaceReplacementTable, false)
		21  }
		22  
		23  // attrEscaper escapes for inclusion in quoted attribute values.
		24  func attrEscaper(args ...interface{}) string {
		25  	s, t := stringify(args...)
		26  	if t == contentTypeHTML {
		27  		return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
		28  	}
		29  	return htmlReplacer(s, htmlReplacementTable, true)
		30  }
		31  
		32  // rcdataEscaper escapes for inclusion in an RCDATA element body.
		33  func rcdataEscaper(args ...interface{}) string {
		34  	s, t := stringify(args...)
		35  	if t == contentTypeHTML {
		36  		return htmlReplacer(s, htmlNormReplacementTable, true)
		37  	}
		38  	return htmlReplacer(s, htmlReplacementTable, true)
		39  }
		40  
		41  // htmlEscaper escapes for inclusion in HTML text.
		42  func htmlEscaper(args ...interface{}) string {
		43  	s, t := stringify(args...)
		44  	if t == contentTypeHTML {
		45  		return s
		46  	}
		47  	return htmlReplacer(s, htmlReplacementTable, true)
		48  }
		49  
		50  // htmlReplacementTable contains the runes that need to be escaped
		51  // inside a quoted attribute value or in a text node.
		52  var htmlReplacementTable = []string{
		53  	// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
		54  	// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
		55  	// CHARACTER character to the current attribute's value.
		56  	// "
		57  	// and similarly
		58  	// https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
		59  	0:		"\uFFFD",
		60  	'"':	"&#34;",
		61  	'&':	"&amp;",
		62  	'\'': "&#39;",
		63  	'+':	"&#43;",
		64  	'<':	"&lt;",
		65  	'>':	"&gt;",
		66  }
		67  
		68  // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
		69  // avoid over-encoding existing entities.
		70  var htmlNormReplacementTable = []string{
		71  	0:		"\uFFFD",
		72  	'"':	"&#34;",
		73  	'\'': "&#39;",
		74  	'+':	"&#43;",
		75  	'<':	"&lt;",
		76  	'>':	"&gt;",
		77  }
		78  
		79  // htmlNospaceReplacementTable contains the runes that need to be escaped
		80  // inside an unquoted attribute value.
		81  // The set of runes escaped is the union of the HTML specials and
		82  // those determined by running the JS below in browsers:
		83  // <div id=d></div>
		84  // <script>(function () {
		85  // var a = [], d = document.getElementById("d"), i, c, s;
		86  // for (i = 0; i < 0x10000; ++i) {
		87  //	 c = String.fromCharCode(i);
		88  //	 d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
		89  //	 s = d.getElementsByTagName("SPAN")[0];
		90  //	 if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
		91  // }
		92  // document.write(a.join(", "));
		93  // })()</script>
		94  var htmlNospaceReplacementTable = []string{
		95  	0:		"&#xfffd;",
		96  	'\t': "&#9;",
		97  	'\n': "&#10;",
		98  	'\v': "&#11;",
		99  	'\f': "&#12;",
	 100  	'\r': "&#13;",
	 101  	' ':	"&#32;",
	 102  	'"':	"&#34;",
	 103  	'&':	"&amp;",
	 104  	'\'': "&#39;",
	 105  	'+':	"&#43;",
	 106  	'<':	"&lt;",
	 107  	'=':	"&#61;",
	 108  	'>':	"&gt;",
	 109  	// A parse error in the attribute value (unquoted) and
	 110  	// before attribute value states.
	 111  	// Treated as a quoting character by IE.
	 112  	'`': "&#96;",
	 113  }
	 114  
	 115  // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
	 116  // without '&' to avoid over-encoding existing entities.
	 117  var htmlNospaceNormReplacementTable = []string{
	 118  	0:		"&#xfffd;",
	 119  	'\t': "&#9;",
	 120  	'\n': "&#10;",
	 121  	'\v': "&#11;",
	 122  	'\f': "&#12;",
	 123  	'\r': "&#13;",
	 124  	' ':	"&#32;",
	 125  	'"':	"&#34;",
	 126  	'\'': "&#39;",
	 127  	'+':	"&#43;",
	 128  	'<':	"&lt;",
	 129  	'=':	"&#61;",
	 130  	'>':	"&gt;",
	 131  	// A parse error in the attribute value (unquoted) and
	 132  	// before attribute value states.
	 133  	// Treated as a quoting character by IE.
	 134  	'`': "&#96;",
	 135  }
	 136  
	 137  // htmlReplacer returns s with runes replaced according to replacementTable
	 138  // and when badRunes is true, certain bad runes are allowed through unescaped.
	 139  func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
	 140  	written, b := 0, new(strings.Builder)
	 141  	r, w := rune(0), 0
	 142  	for i := 0; i < len(s); i += w {
	 143  		// Cannot use 'for range s' because we need to preserve the width
	 144  		// of the runes in the input. If we see a decoding error, the input
	 145  		// width will not be utf8.Runelen(r) and we will overrun the buffer.
	 146  		r, w = utf8.DecodeRuneInString(s[i:])
	 147  		if int(r) < len(replacementTable) {
	 148  			if repl := replacementTable[r]; len(repl) != 0 {
	 149  				if written == 0 {
	 150  					b.Grow(len(s))
	 151  				}
	 152  				b.WriteString(s[written:i])
	 153  				b.WriteString(repl)
	 154  				written = i + w
	 155  			}
	 156  		} else if badRunes {
	 157  			// No-op.
	 158  			// IE does not allow these ranges in unquoted attrs.
	 159  		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
	 160  			if written == 0 {
	 161  				b.Grow(len(s))
	 162  			}
	 163  			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
	 164  			written = i + w
	 165  		}
	 166  	}
	 167  	if written == 0 {
	 168  		return s
	 169  	}
	 170  	b.WriteString(s[written:])
	 171  	return b.String()
	 172  }
	 173  
	 174  // stripTags takes a snippet of HTML and returns only the text content.
	 175  // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
	 176  func stripTags(html string) string {
	 177  	var b bytes.Buffer
	 178  	s, c, i, allText := []byte(html), context{}, 0, true
	 179  	// Using the transition funcs helps us avoid mangling
	 180  	// `<div title="1>2">` or `I <3 Ponies!`.
	 181  	for i != len(s) {
	 182  		if c.delim == delimNone {
	 183  			st := c.state
	 184  			// Use RCDATA instead of parsing into JS or CSS styles.
	 185  			if c.element != elementNone && !isInTag(st) {
	 186  				st = stateRCDATA
	 187  			}
	 188  			d, nread := transitionFunc[st](c, s[i:])
	 189  			i1 := i + nread
	 190  			if c.state == stateText || c.state == stateRCDATA {
	 191  				// Emit text up to the start of the tag or comment.
	 192  				j := i1
	 193  				if d.state != c.state {
	 194  					for j1 := j - 1; j1 >= i; j1-- {
	 195  						if s[j1] == '<' {
	 196  							j = j1
	 197  							break
	 198  						}
	 199  					}
	 200  				}
	 201  				b.Write(s[i:j])
	 202  			} else {
	 203  				allText = false
	 204  			}
	 205  			c, i = d, i1
	 206  			continue
	 207  		}
	 208  		i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
	 209  		if i1 < i {
	 210  			break
	 211  		}
	 212  		if c.delim != delimSpaceOrTagEnd {
	 213  			// Consume any quote.
	 214  			i1++
	 215  		}
	 216  		c, i = context{state: stateTag, element: c.element}, i1
	 217  	}
	 218  	if allText {
	 219  		return html
	 220  	} else if c.state == stateText || c.state == stateRCDATA {
	 221  		b.Write(s[i:])
	 222  	}
	 223  	return b.String()
	 224  }
	 225  
	 226  // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
	 227  // a known-safe HTML attribute.
	 228  func htmlNameFilter(args ...interface{}) string {
	 229  	s, t := stringify(args...)
	 230  	if t == contentTypeHTMLAttr {
	 231  		return s
	 232  	}
	 233  	if len(s) == 0 {
	 234  		// Avoid violation of structure preservation.
	 235  		// <input checked {{.K}}={{.V}}>.
	 236  		// Without this, if .K is empty then .V is the value of
	 237  		// checked, but otherwise .V is the value of the attribute
	 238  		// named .K.
	 239  		return filterFailsafe
	 240  	}
	 241  	s = strings.ToLower(s)
	 242  	if t := attrType(s); t != contentTypePlain {
	 243  		// TODO: Split attr and element name part filters so we can recognize known attributes.
	 244  		return filterFailsafe
	 245  	}
	 246  	for _, r := range s {
	 247  		switch {
	 248  		case '0' <= r && r <= '9':
	 249  		case 'a' <= r && r <= 'z':
	 250  		default:
	 251  			return filterFailsafe
	 252  		}
	 253  	}
	 254  	return s
	 255  }
	 256  
	 257  // commentEscaper returns the empty string regardless of input.
	 258  // Comment content does not correspond to any parsed structure or
	 259  // human-readable content, so the simplest and most secure policy is to drop
	 260  // content interpolated into comments.
	 261  // This approach is equally valid whether or not static comment content is
	 262  // removed from the template.
	 263  func commentEscaper(args ...interface{}) string {
	 264  	return ""
	 265  }
	 266
View as plain text