...

Source file src/text/tabwriter/tabwriter.go

Documentation: text/tabwriter

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  // Package tabwriter implements a write filter (tabwriter.Writer) that
		 6  // translates tabbed columns in input into properly aligned text.
		 7  //
		 8  // The package is using the Elastic Tabstops algorithm described at
		 9  // http://nickgravgaard.com/elastictabstops/index.html.
		10  //
		11  // The text/tabwriter package is frozen and is not accepting new features.
		12  package tabwriter
		13  
		14  import (
		15  	"io"
		16  	"unicode/utf8"
		17  )
		18  
		19  // ----------------------------------------------------------------------------
		20  // Filter implementation
		21  
		22  // A cell represents a segment of text terminated by tabs or line breaks.
		23  // The text itself is stored in a separate buffer; cell only describes the
		24  // segment's size in bytes, its width in runes, and whether it's an htab
		25  // ('\t') terminated cell.
		26  //
		27  type cell struct {
		28  	size	int	// cell size in bytes
		29  	width int	// cell width in runes
		30  	htab	bool // true if the cell is terminated by an htab ('\t')
		31  }
		32  
		33  // A Writer is a filter that inserts padding around tab-delimited
		34  // columns in its input to align them in the output.
		35  //
		36  // The Writer treats incoming bytes as UTF-8-encoded text consisting
		37  // of cells terminated by horizontal ('\t') or vertical ('\v') tabs,
		38  // and newline ('\n') or formfeed ('\f') characters; both newline and
		39  // formfeed act as line breaks.
		40  //
		41  // Tab-terminated cells in contiguous lines constitute a column. The
		42  // Writer inserts padding as needed to make all cells in a column have
		43  // the same width, effectively aligning the columns. It assumes that
		44  // all characters have the same width, except for tabs for which a
		45  // tabwidth must be specified. Column cells must be tab-terminated, not
		46  // tab-separated: non-tab terminated trailing text at the end of a line
		47  // forms a cell but that cell is not part of an aligned column.
		48  // For instance, in this example (where | stands for a horizontal tab):
		49  //
		50  //	aaaa|bbb|d
		51  //	aa	|b	|dd
		52  //	a	 |
		53  //	aa	|cccc|eee
		54  //
		55  // the b and c are in distinct columns (the b column is not contiguous
		56  // all the way). The d and e are not in a column at all (there's no
		57  // terminating tab, nor would the column be contiguous).
		58  //
		59  // The Writer assumes that all Unicode code points have the same width;
		60  // this may not be true in some fonts or if the string contains combining
		61  // characters.
		62  //
		63  // If DiscardEmptyColumns is set, empty columns that are terminated
		64  // entirely by vertical (or "soft") tabs are discarded. Columns
		65  // terminated by horizontal (or "hard") tabs are not affected by
		66  // this flag.
		67  //
		68  // If a Writer is configured to filter HTML, HTML tags and entities
		69  // are passed through. The widths of tags and entities are
		70  // assumed to be zero (tags) and one (entities) for formatting purposes.
		71  //
		72  // A segment of text may be escaped by bracketing it with Escape
		73  // characters. The tabwriter passes escaped text segments through
		74  // unchanged. In particular, it does not interpret any tabs or line
		75  // breaks within the segment. If the StripEscape flag is set, the
		76  // Escape characters are stripped from the output; otherwise they
		77  // are passed through as well. For the purpose of formatting, the
		78  // width of the escaped text is always computed excluding the Escape
		79  // characters.
		80  //
		81  // The formfeed character acts like a newline but it also terminates
		82  // all columns in the current line (effectively calling Flush). Tab-
		83  // terminated cells in the next line start new columns. Unless found
		84  // inside an HTML tag or inside an escaped text segment, formfeed
		85  // characters appear as newlines in the output.
		86  //
		87  // The Writer must buffer input internally, because proper spacing
		88  // of one line may depend on the cells in future lines. Clients must
		89  // call Flush when done calling Write.
		90  //
		91  type Writer struct {
		92  	// configuration
		93  	output	 io.Writer
		94  	minwidth int
		95  	tabwidth int
		96  	padding	int
		97  	padbytes [8]byte
		98  	flags		uint
		99  
	 100  	// current state
	 101  	buf		 []byte	 // collected text excluding tabs or line breaks
	 102  	pos		 int			// buffer position up to which cell.width of incomplete cell has been computed
	 103  	cell		cell		 // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
	 104  	endChar byte		 // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
	 105  	lines	 [][]cell // list of lines; each line is a list of cells
	 106  	widths	[]int		// list of column widths in runes - re-used during formatting
	 107  }
	 108  
	 109  // addLine adds a new line.
	 110  // flushed is a hint indicating whether the underlying writer was just flushed.
	 111  // If so, the previous line is not likely to be a good indicator of the new line's cells.
	 112  func (b *Writer) addLine(flushed bool) {
	 113  	// Grow slice instead of appending,
	 114  	// as that gives us an opportunity
	 115  	// to re-use an existing []cell.
	 116  	if n := len(b.lines) + 1; n <= cap(b.lines) {
	 117  		b.lines = b.lines[:n]
	 118  		b.lines[n-1] = b.lines[n-1][:0]
	 119  	} else {
	 120  		b.lines = append(b.lines, nil)
	 121  	}
	 122  
	 123  	if !flushed {
	 124  		// The previous line is probably a good indicator
	 125  		// of how many cells the current line will have.
	 126  		// If the current line's capacity is smaller than that,
	 127  		// abandon it and make a new one.
	 128  		if n := len(b.lines); n >= 2 {
	 129  			if prev := len(b.lines[n-2]); prev > cap(b.lines[n-1]) {
	 130  				b.lines[n-1] = make([]cell, 0, prev)
	 131  			}
	 132  		}
	 133  	}
	 134  }
	 135  
	 136  // Reset the current state.
	 137  func (b *Writer) reset() {
	 138  	b.buf = b.buf[:0]
	 139  	b.pos = 0
	 140  	b.cell = cell{}
	 141  	b.endChar = 0
	 142  	b.lines = b.lines[0:0]
	 143  	b.widths = b.widths[0:0]
	 144  	b.addLine(true)
	 145  }
	 146  
	 147  // Internal representation (current state):
	 148  //
	 149  // - all text written is appended to buf; tabs and line breaks are stripped away
	 150  // - at any given time there is a (possibly empty) incomplete cell at the end
	 151  //	 (the cell starts after a tab or line break)
	 152  // - cell.size is the number of bytes belonging to the cell so far
	 153  // - cell.width is text width in runes of that cell from the start of the cell to
	 154  //	 position pos; html tags and entities are excluded from this width if html
	 155  //	 filtering is enabled
	 156  // - the sizes and widths of processed text are kept in the lines list
	 157  //	 which contains a list of cells for each line
	 158  // - the widths list is a temporary list with current widths used during
	 159  //	 formatting; it is kept in Writer because it's re-used
	 160  //
	 161  //										|<---------- size ---------->|
	 162  //										|														|
	 163  //										|<- width ->|<- ignored ->|	|
	 164  //										|					 |						 |	|
	 165  // [---processed---tab------------<tag>...</tag>...]
	 166  // ^									^												 ^
	 167  // |									|												 |
	 168  // buf								start of incomplete cell	pos
	 169  
	 170  // Formatting can be controlled with these flags.
	 171  const (
	 172  	// Ignore html tags and treat entities (starting with '&'
	 173  	// and ending in ';') as single characters (width = 1).
	 174  	FilterHTML uint = 1 << iota
	 175  
	 176  	// Strip Escape characters bracketing escaped text segments
	 177  	// instead of passing them through unchanged with the text.
	 178  	StripEscape
	 179  
	 180  	// Force right-alignment of cell content.
	 181  	// Default is left-alignment.
	 182  	AlignRight
	 183  
	 184  	// Handle empty columns as if they were not present in
	 185  	// the input in the first place.
	 186  	DiscardEmptyColumns
	 187  
	 188  	// Always use tabs for indentation columns (i.e., padding of
	 189  	// leading empty cells on the left) independent of padchar.
	 190  	TabIndent
	 191  
	 192  	// Print a vertical bar ('|') between columns (after formatting).
	 193  	// Discarded columns appear as zero-width columns ("||").
	 194  	Debug
	 195  )
	 196  
	 197  // A Writer must be initialized with a call to Init. The first parameter (output)
	 198  // specifies the filter output. The remaining parameters control the formatting:
	 199  //
	 200  //	minwidth	minimal cell width including any padding
	 201  //	tabwidth	width of tab characters (equivalent number of spaces)
	 202  //	padding		padding added to a cell before computing its width
	 203  //	padchar		ASCII char used for padding
	 204  //			if padchar == '\t', the Writer will assume that the
	 205  //			width of a '\t' in the formatted output is tabwidth,
	 206  //			and cells are left-aligned independent of align_left
	 207  //			(for correct-looking results, tabwidth must correspond
	 208  //			to the tab width in the viewer displaying the result)
	 209  //	flags		formatting control
	 210  //
	 211  func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
	 212  	if minwidth < 0 || tabwidth < 0 || padding < 0 {
	 213  		panic("negative minwidth, tabwidth, or padding")
	 214  	}
	 215  	b.output = output
	 216  	b.minwidth = minwidth
	 217  	b.tabwidth = tabwidth
	 218  	b.padding = padding
	 219  	for i := range b.padbytes {
	 220  		b.padbytes[i] = padchar
	 221  	}
	 222  	if padchar == '\t' {
	 223  		// tab padding enforces left-alignment
	 224  		flags &^= AlignRight
	 225  	}
	 226  	b.flags = flags
	 227  
	 228  	b.reset()
	 229  
	 230  	return b
	 231  }
	 232  
	 233  // debugging support (keep code around)
	 234  func (b *Writer) dump() {
	 235  	pos := 0
	 236  	for i, line := range b.lines {
	 237  		print("(", i, ") ")
	 238  		for _, c := range line {
	 239  			print("[", string(b.buf[pos:pos+c.size]), "]")
	 240  			pos += c.size
	 241  		}
	 242  		print("\n")
	 243  	}
	 244  	print("\n")
	 245  }
	 246  
	 247  // local error wrapper so we can distinguish errors we want to return
	 248  // as errors from genuine panics (which we don't want to return as errors)
	 249  type osError struct {
	 250  	err error
	 251  }
	 252  
	 253  func (b *Writer) write0(buf []byte) {
	 254  	n, err := b.output.Write(buf)
	 255  	if n != len(buf) && err == nil {
	 256  		err = io.ErrShortWrite
	 257  	}
	 258  	if err != nil {
	 259  		panic(osError{err})
	 260  	}
	 261  }
	 262  
	 263  func (b *Writer) writeN(src []byte, n int) {
	 264  	for n > len(src) {
	 265  		b.write0(src)
	 266  		n -= len(src)
	 267  	}
	 268  	b.write0(src[0:n])
	 269  }
	 270  
	 271  var (
	 272  	newline = []byte{'\n'}
	 273  	tabs		= []byte("\t\t\t\t\t\t\t\t")
	 274  )
	 275  
	 276  func (b *Writer) writePadding(textw, cellw int, useTabs bool) {
	 277  	if b.padbytes[0] == '\t' || useTabs {
	 278  		// padding is done with tabs
	 279  		if b.tabwidth == 0 {
	 280  			return // tabs have no width - can't do any padding
	 281  		}
	 282  		// make cellw the smallest multiple of b.tabwidth
	 283  		cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth
	 284  		n := cellw - textw // amount of padding
	 285  		if n < 0 {
	 286  			panic("internal error")
	 287  		}
	 288  		b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth)
	 289  		return
	 290  	}
	 291  
	 292  	// padding is done with non-tab characters
	 293  	b.writeN(b.padbytes[0:], cellw-textw)
	 294  }
	 295  
	 296  var vbar = []byte{'|'}
	 297  
	 298  func (b *Writer) writeLines(pos0 int, line0, line1 int) (pos int) {
	 299  	pos = pos0
	 300  	for i := line0; i < line1; i++ {
	 301  		line := b.lines[i]
	 302  
	 303  		// if TabIndent is set, use tabs to pad leading empty cells
	 304  		useTabs := b.flags&TabIndent != 0
	 305  
	 306  		for j, c := range line {
	 307  			if j > 0 && b.flags&Debug != 0 {
	 308  				// indicate column break
	 309  				b.write0(vbar)
	 310  			}
	 311  
	 312  			if c.size == 0 {
	 313  				// empty cell
	 314  				if j < len(b.widths) {
	 315  					b.writePadding(c.width, b.widths[j], useTabs)
	 316  				}
	 317  			} else {
	 318  				// non-empty cell
	 319  				useTabs = false
	 320  				if b.flags&AlignRight == 0 { // align left
	 321  					b.write0(b.buf[pos : pos+c.size])
	 322  					pos += c.size
	 323  					if j < len(b.widths) {
	 324  						b.writePadding(c.width, b.widths[j], false)
	 325  					}
	 326  				} else { // align right
	 327  					if j < len(b.widths) {
	 328  						b.writePadding(c.width, b.widths[j], false)
	 329  					}
	 330  					b.write0(b.buf[pos : pos+c.size])
	 331  					pos += c.size
	 332  				}
	 333  			}
	 334  		}
	 335  
	 336  		if i+1 == len(b.lines) {
	 337  			// last buffered line - we don't have a newline, so just write
	 338  			// any outstanding buffered data
	 339  			b.write0(b.buf[pos : pos+b.cell.size])
	 340  			pos += b.cell.size
	 341  		} else {
	 342  			// not the last line - write newline
	 343  			b.write0(newline)
	 344  		}
	 345  	}
	 346  	return
	 347  }
	 348  
	 349  // Format the text between line0 and line1 (excluding line1); pos
	 350  // is the buffer position corresponding to the beginning of line0.
	 351  // Returns the buffer position corresponding to the beginning of
	 352  // line1 and an error, if any.
	 353  //
	 354  func (b *Writer) format(pos0 int, line0, line1 int) (pos int) {
	 355  	pos = pos0
	 356  	column := len(b.widths)
	 357  	for this := line0; this < line1; this++ {
	 358  		line := b.lines[this]
	 359  
	 360  		if column >= len(line)-1 {
	 361  			continue
	 362  		}
	 363  		// cell exists in this column => this line
	 364  		// has more cells than the previous line
	 365  		// (the last cell per line is ignored because cells are
	 366  		// tab-terminated; the last cell per line describes the
	 367  		// text before the newline/formfeed and does not belong
	 368  		// to a column)
	 369  
	 370  		// print unprinted lines until beginning of block
	 371  		pos = b.writeLines(pos, line0, this)
	 372  		line0 = this
	 373  
	 374  		// column block begin
	 375  		width := b.minwidth // minimal column width
	 376  		discardable := true // true if all cells in this column are empty and "soft"
	 377  		for ; this < line1; this++ {
	 378  			line = b.lines[this]
	 379  			if column >= len(line)-1 {
	 380  				break
	 381  			}
	 382  			// cell exists in this column
	 383  			c := line[column]
	 384  			// update width
	 385  			if w := c.width + b.padding; w > width {
	 386  				width = w
	 387  			}
	 388  			// update discardable
	 389  			if c.width > 0 || c.htab {
	 390  				discardable = false
	 391  			}
	 392  		}
	 393  		// column block end
	 394  
	 395  		// discard empty columns if necessary
	 396  		if discardable && b.flags&DiscardEmptyColumns != 0 {
	 397  			width = 0
	 398  		}
	 399  
	 400  		// format and print all columns to the right of this column
	 401  		// (we know the widths of this column and all columns to the left)
	 402  		b.widths = append(b.widths, width) // push width
	 403  		pos = b.format(pos, line0, this)
	 404  		b.widths = b.widths[0 : len(b.widths)-1] // pop width
	 405  		line0 = this
	 406  	}
	 407  
	 408  	// print unprinted lines until end
	 409  	return b.writeLines(pos, line0, line1)
	 410  }
	 411  
	 412  // Append text to current cell.
	 413  func (b *Writer) append(text []byte) {
	 414  	b.buf = append(b.buf, text...)
	 415  	b.cell.size += len(text)
	 416  }
	 417  
	 418  // Update the cell width.
	 419  func (b *Writer) updateWidth() {
	 420  	b.cell.width += utf8.RuneCount(b.buf[b.pos:])
	 421  	b.pos = len(b.buf)
	 422  }
	 423  
	 424  // To escape a text segment, bracket it with Escape characters.
	 425  // For instance, the tab in this string "Ignore this tab: \xff\t\xff"
	 426  // does not terminate a cell and constitutes a single character of
	 427  // width one for formatting purposes.
	 428  //
	 429  // The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
	 430  //
	 431  const Escape = '\xff'
	 432  
	 433  // Start escaped mode.
	 434  func (b *Writer) startEscape(ch byte) {
	 435  	switch ch {
	 436  	case Escape:
	 437  		b.endChar = Escape
	 438  	case '<':
	 439  		b.endChar = '>'
	 440  	case '&':
	 441  		b.endChar = ';'
	 442  	}
	 443  }
	 444  
	 445  // Terminate escaped mode. If the escaped text was an HTML tag, its width
	 446  // is assumed to be zero for formatting purposes; if it was an HTML entity,
	 447  // its width is assumed to be one. In all other cases, the width is the
	 448  // unicode width of the text.
	 449  //
	 450  func (b *Writer) endEscape() {
	 451  	switch b.endChar {
	 452  	case Escape:
	 453  		b.updateWidth()
	 454  		if b.flags&StripEscape == 0 {
	 455  			b.cell.width -= 2 // don't count the Escape chars
	 456  		}
	 457  	case '>': // tag of zero width
	 458  	case ';':
	 459  		b.cell.width++ // entity, count as one rune
	 460  	}
	 461  	b.pos = len(b.buf)
	 462  	b.endChar = 0
	 463  }
	 464  
	 465  // Terminate the current cell by adding it to the list of cells of the
	 466  // current line. Returns the number of cells in that line.
	 467  //
	 468  func (b *Writer) terminateCell(htab bool) int {
	 469  	b.cell.htab = htab
	 470  	line := &b.lines[len(b.lines)-1]
	 471  	*line = append(*line, b.cell)
	 472  	b.cell = cell{}
	 473  	return len(*line)
	 474  }
	 475  
	 476  func (b *Writer) handlePanic(err *error, op string) {
	 477  	if e := recover(); e != nil {
	 478  		if op == "Flush" {
	 479  			// If Flush ran into a panic, we still need to reset.
	 480  			b.reset()
	 481  		}
	 482  		if nerr, ok := e.(osError); ok {
	 483  			*err = nerr.err
	 484  			return
	 485  		}
	 486  		panic("tabwriter: panic during " + op)
	 487  	}
	 488  }
	 489  
	 490  // Flush should be called after the last call to Write to ensure
	 491  // that any data buffered in the Writer is written to output. Any
	 492  // incomplete escape sequence at the end is considered
	 493  // complete for formatting purposes.
	 494  func (b *Writer) Flush() error {
	 495  	return b.flush()
	 496  }
	 497  
	 498  // flush is the internal version of Flush, with a named return value which we
	 499  // don't want to expose.
	 500  func (b *Writer) flush() (err error) {
	 501  	defer b.handlePanic(&err, "Flush")
	 502  	b.flushNoDefers()
	 503  	return nil
	 504  }
	 505  
	 506  // flushNoDefers is like flush, but without a deferred handlePanic call. This
	 507  // can be called from other methods which already have their own deferred
	 508  // handlePanic calls, such as Write, and avoid the extra defer work.
	 509  func (b *Writer) flushNoDefers() {
	 510  	// add current cell if not empty
	 511  	if b.cell.size > 0 {
	 512  		if b.endChar != 0 {
	 513  			// inside escape - terminate it even if incomplete
	 514  			b.endEscape()
	 515  		}
	 516  		b.terminateCell(false)
	 517  	}
	 518  
	 519  	// format contents of buffer
	 520  	b.format(0, 0, len(b.lines))
	 521  	b.reset()
	 522  }
	 523  
	 524  var hbar = []byte("---\n")
	 525  
	 526  // Write writes buf to the writer b.
	 527  // The only errors returned are ones encountered
	 528  // while writing to the underlying output stream.
	 529  //
	 530  func (b *Writer) Write(buf []byte) (n int, err error) {
	 531  	defer b.handlePanic(&err, "Write")
	 532  
	 533  	// split text into cells
	 534  	n = 0
	 535  	for i, ch := range buf {
	 536  		if b.endChar == 0 {
	 537  			// outside escape
	 538  			switch ch {
	 539  			case '\t', '\v', '\n', '\f':
	 540  				// end of cell
	 541  				b.append(buf[n:i])
	 542  				b.updateWidth()
	 543  				n = i + 1 // ch consumed
	 544  				ncells := b.terminateCell(ch == '\t')
	 545  				if ch == '\n' || ch == '\f' {
	 546  					// terminate line
	 547  					b.addLine(ch == '\f')
	 548  					if ch == '\f' || ncells == 1 {
	 549  						// A '\f' always forces a flush. Otherwise, if the previous
	 550  						// line has only one cell which does not have an impact on
	 551  						// the formatting of the following lines (the last cell per
	 552  						// line is ignored by format()), thus we can flush the
	 553  						// Writer contents.
	 554  						b.flushNoDefers()
	 555  						if ch == '\f' && b.flags&Debug != 0 {
	 556  							// indicate section break
	 557  							b.write0(hbar)
	 558  						}
	 559  					}
	 560  				}
	 561  
	 562  			case Escape:
	 563  				// start of escaped sequence
	 564  				b.append(buf[n:i])
	 565  				b.updateWidth()
	 566  				n = i
	 567  				if b.flags&StripEscape != 0 {
	 568  					n++ // strip Escape
	 569  				}
	 570  				b.startEscape(Escape)
	 571  
	 572  			case '<', '&':
	 573  				// possibly an html tag/entity
	 574  				if b.flags&FilterHTML != 0 {
	 575  					// begin of tag/entity
	 576  					b.append(buf[n:i])
	 577  					b.updateWidth()
	 578  					n = i
	 579  					b.startEscape(ch)
	 580  				}
	 581  			}
	 582  
	 583  		} else {
	 584  			// inside escape
	 585  			if ch == b.endChar {
	 586  				// end of tag/entity
	 587  				j := i + 1
	 588  				if ch == Escape && b.flags&StripEscape != 0 {
	 589  					j = i // strip Escape
	 590  				}
	 591  				b.append(buf[n:j])
	 592  				n = i + 1 // ch consumed
	 593  				b.endEscape()
	 594  			}
	 595  		}
	 596  	}
	 597  
	 598  	// append leftover text
	 599  	b.append(buf[n:])
	 600  	n = len(buf)
	 601  	return
	 602  }
	 603  
	 604  // NewWriter allocates and initializes a new tabwriter.Writer.
	 605  // The parameters are the same as for the Init function.
	 606  //
	 607  func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
	 608  	return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
	 609  }
	 610  

View as plain text