...

Source file src/go/doc/comment.go

Documentation: go/doc

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Godoc comment extraction and comment -> HTML formatting.
		 6  
		 7  package doc
		 8  
		 9  import (
		10  	"bytes"
		11  	"internal/lazyregexp"
		12  	"io"
		13  	"strings"
		14  	"text/template" // for HTMLEscape
		15  	"unicode"
		16  	"unicode/utf8"
		17  )
		18  
		19  const (
		20  	ldquo = "“"
		21  	rdquo = "”"
		22  	ulquo = "“"
		23  	urquo = "”"
		24  )
		25  
		26  var (
		27  	htmlQuoteReplacer		= strings.NewReplacer(ulquo, ldquo, urquo, rdquo)
		28  	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
		29  )
		30  
		31  // Escape comment text for HTML. If nice is set,
		32  // also turn `` into “ and '' into ”.
		33  func commentEscape(w io.Writer, text string, nice bool) {
		34  	if nice {
		35  		// In the first pass, we convert `` and '' into their unicode equivalents.
		36  		// This prevents them from being escaped in HTMLEscape.
		37  		text = convertQuotes(text)
		38  		var buf bytes.Buffer
		39  		template.HTMLEscape(&buf, []byte(text))
		40  		// Now we convert the unicode quotes to their HTML escaped entities to maintain old behavior.
		41  		// We need to use a temp buffer to read the string back and do the conversion,
		42  		// otherwise HTMLEscape will escape & to &
		43  		htmlQuoteReplacer.WriteString(w, buf.String())
		44  		return
		45  	}
		46  	template.HTMLEscape(w, []byte(text))
		47  }
		48  
		49  func convertQuotes(text string) string {
		50  	return unicodeQuoteReplacer.Replace(text)
		51  }
		52  
		53  const (
		54  	// Regexp for Go identifiers
		55  	identRx = `[\pL_][\pL_0-9]*`
		56  
		57  	// Regexp for URLs
		58  	// Match parens, and check later for balance - see #5043, #22285
		59  	// Match .,:;?! within path, but not at end - see #18139, #16565
		60  	// This excludes some rare yet valid urls ending in common punctuation
		61  	// in order to allow sentences ending in URLs.
		62  
		63  	// protocol (required) e.g. http
		64  	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
		65  	// host (required) e.g. www.example.com or [::1]:8080
		66  	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
		67  	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
		68  	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
		69  
		70  	urlRx = protoPart + `://` + hostPart + pathPart
		71  )
		72  
		73  var matchRx = lazyregexp.New(`(` + urlRx + `)|(` + identRx + `)`)
		74  
		75  var (
		76  	html_a			= []byte(`<a href="`)
		77  	html_aq		 = []byte(`">`)
		78  	html_enda	 = []byte("</a>")
		79  	html_i			= []byte("<i>")
		80  	html_endi	 = []byte("</i>")
		81  	html_p			= []byte("<p>\n")
		82  	html_endp	 = []byte("</p>\n")
		83  	html_pre		= []byte("<pre>")
		84  	html_endpre = []byte("</pre>\n")
		85  	html_h			= []byte(`<h3 id="`)
		86  	html_hq		 = []byte(`">`)
		87  	html_endh	 = []byte("</h3>\n")
		88  )
		89  
		90  // Emphasize and escape a line of text for HTML. URLs are converted into links;
		91  // if the URL also appears in the words map, the link is taken from the map (if
		92  // the corresponding map value is the empty string, the URL is not converted
		93  // into a link). Go identifiers that appear in the words map are italicized; if
		94  // the corresponding map value is not the empty string, it is considered a URL
		95  // and the word is converted into a link. If nice is set, the remaining text's
		96  // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
		97  // and '' into &rdquo;).
		98  func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
		99  	for {
	 100  		m := matchRx.FindStringSubmatchIndex(line)
	 101  		if m == nil {
	 102  			break
	 103  		}
	 104  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
	 105  
	 106  		// write text before match
	 107  		commentEscape(w, line[0:m[0]], nice)
	 108  
	 109  		// adjust match for URLs
	 110  		match := line[m[0]:m[1]]
	 111  		if strings.Contains(match, "://") {
	 112  			m0, m1 := m[0], m[1]
	 113  			for _, s := range []string{"()", "{}", "[]"} {
	 114  				open, close := s[:1], s[1:] // E.g., "(" and ")"
	 115  				// require opening parentheses before closing parentheses (#22285)
	 116  				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
	 117  					m1 = m0 + i
	 118  					match = line[m0:m1]
	 119  				}
	 120  				// require balanced pairs of parentheses (#5043)
	 121  				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
	 122  					m1 = strings.LastIndexAny(line[:m1], s)
	 123  					match = line[m0:m1]
	 124  				}
	 125  			}
	 126  			if m1 != m[1] {
	 127  				// redo matching with shortened line for correct indices
	 128  				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
	 129  			}
	 130  		}
	 131  
	 132  		// analyze match
	 133  		url := ""
	 134  		italics := false
	 135  		if words != nil {
	 136  			url, italics = words[match]
	 137  		}
	 138  		if m[2] >= 0 {
	 139  			// match against first parenthesized sub-regexp; must be match against urlRx
	 140  			if !italics {
	 141  				// no alternative URL in words list, use match instead
	 142  				url = match
	 143  			}
	 144  			italics = false // don't italicize URLs
	 145  		}
	 146  
	 147  		// write match
	 148  		if len(url) > 0 {
	 149  			w.Write(html_a)
	 150  			template.HTMLEscape(w, []byte(url))
	 151  			w.Write(html_aq)
	 152  		}
	 153  		if italics {
	 154  			w.Write(html_i)
	 155  		}
	 156  		commentEscape(w, match, nice)
	 157  		if italics {
	 158  			w.Write(html_endi)
	 159  		}
	 160  		if len(url) > 0 {
	 161  			w.Write(html_enda)
	 162  		}
	 163  
	 164  		// advance
	 165  		line = line[m[1]:]
	 166  	}
	 167  	commentEscape(w, line, nice)
	 168  }
	 169  
	 170  func indentLen(s string) int {
	 171  	i := 0
	 172  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
	 173  		i++
	 174  	}
	 175  	return i
	 176  }
	 177  
	 178  func isBlank(s string) bool {
	 179  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
	 180  }
	 181  
	 182  func commonPrefix(a, b string) string {
	 183  	i := 0
	 184  	for i < len(a) && i < len(b) && a[i] == b[i] {
	 185  		i++
	 186  	}
	 187  	return a[0:i]
	 188  }
	 189  
	 190  func unindent(block []string) {
	 191  	if len(block) == 0 {
	 192  		return
	 193  	}
	 194  
	 195  	// compute maximum common white prefix
	 196  	prefix := block[0][0:indentLen(block[0])]
	 197  	for _, line := range block {
	 198  		if !isBlank(line) {
	 199  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
	 200  		}
	 201  	}
	 202  	n := len(prefix)
	 203  
	 204  	// remove
	 205  	for i, line := range block {
	 206  		if !isBlank(line) {
	 207  			block[i] = line[n:]
	 208  		}
	 209  	}
	 210  }
	 211  
	 212  // heading returns the trimmed line if it passes as a section heading;
	 213  // otherwise it returns the empty string.
	 214  func heading(line string) string {
	 215  	line = strings.TrimSpace(line)
	 216  	if len(line) == 0 {
	 217  		return ""
	 218  	}
	 219  
	 220  	// a heading must start with an uppercase letter
	 221  	r, _ := utf8.DecodeRuneInString(line)
	 222  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
	 223  		return ""
	 224  	}
	 225  
	 226  	// it must end in a letter or digit:
	 227  	r, _ = utf8.DecodeLastRuneInString(line)
	 228  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
	 229  		return ""
	 230  	}
	 231  
	 232  	// exclude lines with illegal characters. we allow "(),"
	 233  	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
	 234  		return ""
	 235  	}
	 236  
	 237  	// allow "'" for possessive "'s" only
	 238  	for b := line; ; {
	 239  		i := strings.IndexRune(b, '\'')
	 240  		if i < 0 {
	 241  			break
	 242  		}
	 243  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
	 244  			return "" // not followed by "s "
	 245  		}
	 246  		b = b[i+2:]
	 247  	}
	 248  
	 249  	// allow "." when followed by non-space
	 250  	for b := line; ; {
	 251  		i := strings.IndexRune(b, '.')
	 252  		if i < 0 {
	 253  			break
	 254  		}
	 255  		if i+1 >= len(b) || b[i+1] == ' ' {
	 256  			return "" // not followed by non-space
	 257  		}
	 258  		b = b[i+1:]
	 259  	}
	 260  
	 261  	return line
	 262  }
	 263  
	 264  type op int
	 265  
	 266  const (
	 267  	opPara op = iota
	 268  	opHead
	 269  	opPre
	 270  )
	 271  
	 272  type block struct {
	 273  	op		op
	 274  	lines []string
	 275  }
	 276  
	 277  var nonAlphaNumRx = lazyregexp.New(`[^a-zA-Z0-9]`)
	 278  
	 279  func anchorID(line string) string {
	 280  	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
	 281  	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
	 282  }
	 283  
	 284  // ToHTML converts comment text to formatted HTML.
	 285  // The comment was prepared by DocReader,
	 286  // so it is known not to have leading, trailing blank lines
	 287  // nor to have trailing spaces at the end of lines.
	 288  // The comment markers have already been removed.
	 289  //
	 290  // Each span of unindented non-blank lines is converted into
	 291  // a single paragraph. There is one exception to the rule: a span that
	 292  // consists of a single line, is followed by another paragraph span,
	 293  // begins with a capital letter, and contains no punctuation
	 294  // other than parentheses and commas is formatted as a heading.
	 295  //
	 296  // A span of indented lines is converted into a <pre> block,
	 297  // with the common indent prefix removed.
	 298  //
	 299  // URLs in the comment text are converted into links; if the URL also appears
	 300  // in the words map, the link is taken from the map (if the corresponding map
	 301  // value is the empty string, the URL is not converted into a link).
	 302  //
	 303  // A pair of (consecutive) backticks (`) is converted to a unicode left quote (“), and a pair of (consecutive)
	 304  // single quotes (') is converted to a unicode right quote (”).
	 305  //
	 306  // Go identifiers that appear in the words map are italicized; if the corresponding
	 307  // map value is not the empty string, it is considered a URL and the word is converted
	 308  // into a link.
	 309  func ToHTML(w io.Writer, text string, words map[string]string) {
	 310  	for _, b := range blocks(text) {
	 311  		switch b.op {
	 312  		case opPara:
	 313  			w.Write(html_p)
	 314  			for _, line := range b.lines {
	 315  				emphasize(w, line, words, true)
	 316  			}
	 317  			w.Write(html_endp)
	 318  		case opHead:
	 319  			w.Write(html_h)
	 320  			id := ""
	 321  			for _, line := range b.lines {
	 322  				if id == "" {
	 323  					id = anchorID(line)
	 324  					w.Write([]byte(id))
	 325  					w.Write(html_hq)
	 326  				}
	 327  				commentEscape(w, line, true)
	 328  			}
	 329  			if id == "" {
	 330  				w.Write(html_hq)
	 331  			}
	 332  			w.Write(html_endh)
	 333  		case opPre:
	 334  			w.Write(html_pre)
	 335  			for _, line := range b.lines {
	 336  				emphasize(w, line, nil, false)
	 337  			}
	 338  			w.Write(html_endpre)
	 339  		}
	 340  	}
	 341  }
	 342  
	 343  func blocks(text string) []block {
	 344  	var (
	 345  		out	[]block
	 346  		para []string
	 347  
	 348  		lastWasBlank	 = false
	 349  		lastWasHeading = false
	 350  	)
	 351  
	 352  	close := func() {
	 353  		if para != nil {
	 354  			out = append(out, block{opPara, para})
	 355  			para = nil
	 356  		}
	 357  	}
	 358  
	 359  	lines := strings.SplitAfter(text, "\n")
	 360  	unindent(lines)
	 361  	for i := 0; i < len(lines); {
	 362  		line := lines[i]
	 363  		if isBlank(line) {
	 364  			// close paragraph
	 365  			close()
	 366  			i++
	 367  			lastWasBlank = true
	 368  			continue
	 369  		}
	 370  		if indentLen(line) > 0 {
	 371  			// close paragraph
	 372  			close()
	 373  
	 374  			// count indented or blank lines
	 375  			j := i + 1
	 376  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
	 377  				j++
	 378  			}
	 379  			// but not trailing blank lines
	 380  			for j > i && isBlank(lines[j-1]) {
	 381  				j--
	 382  			}
	 383  			pre := lines[i:j]
	 384  			i = j
	 385  
	 386  			unindent(pre)
	 387  
	 388  			// put those lines in a pre block
	 389  			out = append(out, block{opPre, pre})
	 390  			lastWasHeading = false
	 391  			continue
	 392  		}
	 393  
	 394  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
	 395  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
	 396  			// current line is non-blank, surrounded by blank lines
	 397  			// and the next non-blank line is not indented: this
	 398  			// might be a heading.
	 399  			if head := heading(line); head != "" {
	 400  				close()
	 401  				out = append(out, block{opHead, []string{head}})
	 402  				i += 2
	 403  				lastWasHeading = true
	 404  				continue
	 405  			}
	 406  		}
	 407  
	 408  		// open paragraph
	 409  		lastWasBlank = false
	 410  		lastWasHeading = false
	 411  		para = append(para, lines[i])
	 412  		i++
	 413  	}
	 414  	close()
	 415  
	 416  	return out
	 417  }
	 418  
	 419  // ToText prepares comment text for presentation in textual output.
	 420  // It wraps paragraphs of text to width or fewer Unicode code points
	 421  // and then prefixes each line with the indent. In preformatted sections
	 422  // (such as program text), it prefixes each non-blank line with preIndent.
	 423  //
	 424  // A pair of (consecutive) backticks (`) is converted to a unicode left quote (“), and a pair of (consecutive)
	 425  // single quotes (') is converted to a unicode right quote (”).
	 426  func ToText(w io.Writer, text string, indent, preIndent string, width int) {
	 427  	l := lineWrapper{
	 428  		out:		w,
	 429  		width:	width,
	 430  		indent: indent,
	 431  	}
	 432  	for _, b := range blocks(text) {
	 433  		switch b.op {
	 434  		case opPara:
	 435  			// l.write will add leading newline if required
	 436  			for _, line := range b.lines {
	 437  				line = convertQuotes(line)
	 438  				l.write(line)
	 439  			}
	 440  			l.flush()
	 441  		case opHead:
	 442  			w.Write(nl)
	 443  			for _, line := range b.lines {
	 444  				line = convertQuotes(line)
	 445  				l.write(line + "\n")
	 446  			}
	 447  			l.flush()
	 448  		case opPre:
	 449  			w.Write(nl)
	 450  			for _, line := range b.lines {
	 451  				if isBlank(line) {
	 452  					w.Write([]byte("\n"))
	 453  				} else {
	 454  					w.Write([]byte(preIndent))
	 455  					w.Write([]byte(line))
	 456  				}
	 457  			}
	 458  		}
	 459  	}
	 460  }
	 461  
	 462  type lineWrapper struct {
	 463  	out			 io.Writer
	 464  	printed	 bool
	 465  	width		 int
	 466  	indent		string
	 467  	n				 int
	 468  	pendSpace int
	 469  }
	 470  
	 471  var nl = []byte("\n")
	 472  var space = []byte(" ")
	 473  var prefix = []byte("// ")
	 474  
	 475  func (l *lineWrapper) write(text string) {
	 476  	if l.n == 0 && l.printed {
	 477  		l.out.Write(nl) // blank line before new paragraph
	 478  	}
	 479  	l.printed = true
	 480  
	 481  	needsPrefix := false
	 482  	isComment := strings.HasPrefix(text, "//")
	 483  	for _, f := range strings.Fields(text) {
	 484  		w := utf8.RuneCountInString(f)
	 485  		// wrap if line is too long
	 486  		if l.n > 0 && l.n+l.pendSpace+w > l.width {
	 487  			l.out.Write(nl)
	 488  			l.n = 0
	 489  			l.pendSpace = 0
	 490  			needsPrefix = isComment && !strings.HasPrefix(f, "//")
	 491  		}
	 492  		if l.n == 0 {
	 493  			l.out.Write([]byte(l.indent))
	 494  		}
	 495  		if needsPrefix {
	 496  			l.out.Write(prefix)
	 497  			needsPrefix = false
	 498  		}
	 499  		l.out.Write(space[:l.pendSpace])
	 500  		l.out.Write([]byte(f))
	 501  		l.n += l.pendSpace + w
	 502  		l.pendSpace = 1
	 503  	}
	 504  }
	 505  
	 506  func (l *lineWrapper) flush() {
	 507  	if l.n == 0 {
	 508  		return
	 509  	}
	 510  	l.out.Write(nl)
	 511  	l.pendSpace = 0
	 512  	l.n = 0
	 513  }
	 514  

View as plain text