...

Source file src/encoding/json/stream.go

Documentation: encoding/json

		 1  // Copyright 2010 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package json
		 6  
		 7  import (
		 8  	"bytes"
		 9  	"errors"
		10  	"io"
		11  )
		12  
		13  // A Decoder reads and decodes JSON values from an input stream.
		14  type Decoder struct {
		15  	r			 io.Reader
		16  	buf		 []byte
		17  	d			 decodeState
		18  	scanp	 int	 // start of unread data in buf
		19  	scanned int64 // amount of data already scanned
		20  	scan		scanner
		21  	err		 error
		22  
		23  	tokenState int
		24  	tokenStack []int
		25  }
		26  
		27  // NewDecoder returns a new decoder that reads from r.
		28  //
		29  // The decoder introduces its own buffering and may
		30  // read data from r beyond the JSON values requested.
		31  func NewDecoder(r io.Reader) *Decoder {
		32  	return &Decoder{r: r}
		33  }
		34  
		35  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
		36  // Number instead of as a float64.
		37  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
		38  
		39  // DisallowUnknownFields causes the Decoder to return an error when the destination
		40  // is a struct and the input contains object keys which do not match any
		41  // non-ignored, exported fields in the destination.
		42  func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
		43  
		44  // Decode reads the next JSON-encoded value from its
		45  // input and stores it in the value pointed to by v.
		46  //
		47  // See the documentation for Unmarshal for details about
		48  // the conversion of JSON into a Go value.
		49  func (dec *Decoder) Decode(v interface{}) error {
		50  	if dec.err != nil {
		51  		return dec.err
		52  	}
		53  
		54  	if err := dec.tokenPrepareForDecode(); err != nil {
		55  		return err
		56  	}
		57  
		58  	if !dec.tokenValueAllowed() {
		59  		return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
		60  	}
		61  
		62  	// Read whole value into buffer.
		63  	n, err := dec.readValue()
		64  	if err != nil {
		65  		return err
		66  	}
		67  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
		68  	dec.scanp += n
		69  
		70  	// Don't save err from unmarshal into dec.err:
		71  	// the connection is still usable since we read a complete JSON
		72  	// object from it before the error happened.
		73  	err = dec.d.unmarshal(v)
		74  
		75  	// fixup token streaming state
		76  	dec.tokenValueEnd()
		77  
		78  	return err
		79  }
		80  
		81  // Buffered returns a reader of the data remaining in the Decoder's
		82  // buffer. The reader is valid until the next call to Decode.
		83  func (dec *Decoder) Buffered() io.Reader {
		84  	return bytes.NewReader(dec.buf[dec.scanp:])
		85  }
		86  
		87  // readValue reads a JSON value into dec.buf.
		88  // It returns the length of the encoding.
		89  func (dec *Decoder) readValue() (int, error) {
		90  	dec.scan.reset()
		91  
		92  	scanp := dec.scanp
		93  	var err error
		94  Input:
		95  	// help the compiler see that scanp is never negative, so it can remove
		96  	// some bounds checks below.
		97  	for scanp >= 0 {
		98  
		99  		// Look in the buffer for a new value.
	 100  		for ; scanp < len(dec.buf); scanp++ {
	 101  			c := dec.buf[scanp]
	 102  			dec.scan.bytes++
	 103  			switch dec.scan.step(&dec.scan, c) {
	 104  			case scanEnd:
	 105  				// scanEnd is delayed one byte so we decrement
	 106  				// the scanner bytes count by 1 to ensure that
	 107  				// this value is correct in the next call of Decode.
	 108  				dec.scan.bytes--
	 109  				break Input
	 110  			case scanEndObject, scanEndArray:
	 111  				// scanEnd is delayed one byte.
	 112  				// We might block trying to get that byte from src,
	 113  				// so instead invent a space byte.
	 114  				if stateEndValue(&dec.scan, ' ') == scanEnd {
	 115  					scanp++
	 116  					break Input
	 117  				}
	 118  			case scanError:
	 119  				dec.err = dec.scan.err
	 120  				return 0, dec.scan.err
	 121  			}
	 122  		}
	 123  
	 124  		// Did the last read have an error?
	 125  		// Delayed until now to allow buffer scan.
	 126  		if err != nil {
	 127  			if err == io.EOF {
	 128  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
	 129  					break Input
	 130  				}
	 131  				if nonSpace(dec.buf) {
	 132  					err = io.ErrUnexpectedEOF
	 133  				}
	 134  			}
	 135  			dec.err = err
	 136  			return 0, err
	 137  		}
	 138  
	 139  		n := scanp - dec.scanp
	 140  		err = dec.refill()
	 141  		scanp = dec.scanp + n
	 142  	}
	 143  	return scanp - dec.scanp, nil
	 144  }
	 145  
	 146  func (dec *Decoder) refill() error {
	 147  	// Make room to read more into the buffer.
	 148  	// First slide down data already consumed.
	 149  	if dec.scanp > 0 {
	 150  		dec.scanned += int64(dec.scanp)
	 151  		n := copy(dec.buf, dec.buf[dec.scanp:])
	 152  		dec.buf = dec.buf[:n]
	 153  		dec.scanp = 0
	 154  	}
	 155  
	 156  	// Grow buffer if not large enough.
	 157  	const minRead = 512
	 158  	if cap(dec.buf)-len(dec.buf) < minRead {
	 159  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
	 160  		copy(newBuf, dec.buf)
	 161  		dec.buf = newBuf
	 162  	}
	 163  
	 164  	// Read. Delay error for next iteration (after scan).
	 165  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
	 166  	dec.buf = dec.buf[0 : len(dec.buf)+n]
	 167  
	 168  	return err
	 169  }
	 170  
	 171  func nonSpace(b []byte) bool {
	 172  	for _, c := range b {
	 173  		if !isSpace(c) {
	 174  			return true
	 175  		}
	 176  	}
	 177  	return false
	 178  }
	 179  
	 180  // An Encoder writes JSON values to an output stream.
	 181  type Encoder struct {
	 182  	w					io.Writer
	 183  	err				error
	 184  	escapeHTML bool
	 185  
	 186  	indentBuf		*bytes.Buffer
	 187  	indentPrefix string
	 188  	indentValue	string
	 189  }
	 190  
	 191  // NewEncoder returns a new encoder that writes to w.
	 192  func NewEncoder(w io.Writer) *Encoder {
	 193  	return &Encoder{w: w, escapeHTML: true}
	 194  }
	 195  
	 196  // Encode writes the JSON encoding of v to the stream,
	 197  // followed by a newline character.
	 198  //
	 199  // See the documentation for Marshal for details about the
	 200  // conversion of Go values to JSON.
	 201  func (enc *Encoder) Encode(v interface{}) error {
	 202  	if enc.err != nil {
	 203  		return enc.err
	 204  	}
	 205  	e := newEncodeState()
	 206  	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
	 207  	if err != nil {
	 208  		return err
	 209  	}
	 210  
	 211  	// Terminate each value with a newline.
	 212  	// This makes the output look a little nicer
	 213  	// when debugging, and some kind of space
	 214  	// is required if the encoded value was a number,
	 215  	// so that the reader knows there aren't more
	 216  	// digits coming.
	 217  	e.WriteByte('\n')
	 218  
	 219  	b := e.Bytes()
	 220  	if enc.indentPrefix != "" || enc.indentValue != "" {
	 221  		if enc.indentBuf == nil {
	 222  			enc.indentBuf = new(bytes.Buffer)
	 223  		}
	 224  		enc.indentBuf.Reset()
	 225  		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
	 226  		if err != nil {
	 227  			return err
	 228  		}
	 229  		b = enc.indentBuf.Bytes()
	 230  	}
	 231  	if _, err = enc.w.Write(b); err != nil {
	 232  		enc.err = err
	 233  	}
	 234  	encodeStatePool.Put(e)
	 235  	return err
	 236  }
	 237  
	 238  // SetIndent instructs the encoder to format each subsequent encoded
	 239  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
	 240  // Calling SetIndent("", "") disables indentation.
	 241  func (enc *Encoder) SetIndent(prefix, indent string) {
	 242  	enc.indentPrefix = prefix
	 243  	enc.indentValue = indent
	 244  }
	 245  
	 246  // SetEscapeHTML specifies whether problematic HTML characters
	 247  // should be escaped inside JSON quoted strings.
	 248  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
	 249  // to avoid certain safety problems that can arise when embedding JSON in HTML.
	 250  //
	 251  // In non-HTML settings where the escaping interferes with the readability
	 252  // of the output, SetEscapeHTML(false) disables this behavior.
	 253  func (enc *Encoder) SetEscapeHTML(on bool) {
	 254  	enc.escapeHTML = on
	 255  }
	 256  
	 257  // RawMessage is a raw encoded JSON value.
	 258  // It implements Marshaler and Unmarshaler and can
	 259  // be used to delay JSON decoding or precompute a JSON encoding.
	 260  type RawMessage []byte
	 261  
	 262  // MarshalJSON returns m as the JSON encoding of m.
	 263  func (m RawMessage) MarshalJSON() ([]byte, error) {
	 264  	if m == nil {
	 265  		return []byte("null"), nil
	 266  	}
	 267  	return m, nil
	 268  }
	 269  
	 270  // UnmarshalJSON sets *m to a copy of data.
	 271  func (m *RawMessage) UnmarshalJSON(data []byte) error {
	 272  	if m == nil {
	 273  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
	 274  	}
	 275  	*m = append((*m)[0:0], data...)
	 276  	return nil
	 277  }
	 278  
	 279  var _ Marshaler = (*RawMessage)(nil)
	 280  var _ Unmarshaler = (*RawMessage)(nil)
	 281  
	 282  // A Token holds a value of one of these types:
	 283  //
	 284  //	Delim, for the four JSON delimiters [ ] { }
	 285  //	bool, for JSON booleans
	 286  //	float64, for JSON numbers
	 287  //	Number, for JSON numbers
	 288  //	string, for JSON string literals
	 289  //	nil, for JSON null
	 290  //
	 291  type Token interface{}
	 292  
	 293  const (
	 294  	tokenTopValue = iota
	 295  	tokenArrayStart
	 296  	tokenArrayValue
	 297  	tokenArrayComma
	 298  	tokenObjectStart
	 299  	tokenObjectKey
	 300  	tokenObjectColon
	 301  	tokenObjectValue
	 302  	tokenObjectComma
	 303  )
	 304  
	 305  // advance tokenstate from a separator state to a value state
	 306  func (dec *Decoder) tokenPrepareForDecode() error {
	 307  	// Note: Not calling peek before switch, to avoid
	 308  	// putting peek into the standard Decode path.
	 309  	// peek is only called when using the Token API.
	 310  	switch dec.tokenState {
	 311  	case tokenArrayComma:
	 312  		c, err := dec.peek()
	 313  		if err != nil {
	 314  			return err
	 315  		}
	 316  		if c != ',' {
	 317  			return &SyntaxError{"expected comma after array element", dec.InputOffset()}
	 318  		}
	 319  		dec.scanp++
	 320  		dec.tokenState = tokenArrayValue
	 321  	case tokenObjectColon:
	 322  		c, err := dec.peek()
	 323  		if err != nil {
	 324  			return err
	 325  		}
	 326  		if c != ':' {
	 327  			return &SyntaxError{"expected colon after object key", dec.InputOffset()}
	 328  		}
	 329  		dec.scanp++
	 330  		dec.tokenState = tokenObjectValue
	 331  	}
	 332  	return nil
	 333  }
	 334  
	 335  func (dec *Decoder) tokenValueAllowed() bool {
	 336  	switch dec.tokenState {
	 337  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
	 338  		return true
	 339  	}
	 340  	return false
	 341  }
	 342  
	 343  func (dec *Decoder) tokenValueEnd() {
	 344  	switch dec.tokenState {
	 345  	case tokenArrayStart, tokenArrayValue:
	 346  		dec.tokenState = tokenArrayComma
	 347  	case tokenObjectValue:
	 348  		dec.tokenState = tokenObjectComma
	 349  	}
	 350  }
	 351  
	 352  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
	 353  type Delim rune
	 354  
	 355  func (d Delim) String() string {
	 356  	return string(d)
	 357  }
	 358  
	 359  // Token returns the next JSON token in the input stream.
	 360  // At the end of the input stream, Token returns nil, io.EOF.
	 361  //
	 362  // Token guarantees that the delimiters [ ] { } it returns are
	 363  // properly nested and matched: if Token encounters an unexpected
	 364  // delimiter in the input, it will return an error.
	 365  //
	 366  // The input stream consists of basic JSON values—bool, string,
	 367  // number, and null—along with delimiters [ ] { } of type Delim
	 368  // to mark the start and end of arrays and objects.
	 369  // Commas and colons are elided.
	 370  func (dec *Decoder) Token() (Token, error) {
	 371  	for {
	 372  		c, err := dec.peek()
	 373  		if err != nil {
	 374  			return nil, err
	 375  		}
	 376  		switch c {
	 377  		case '[':
	 378  			if !dec.tokenValueAllowed() {
	 379  				return dec.tokenError(c)
	 380  			}
	 381  			dec.scanp++
	 382  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
	 383  			dec.tokenState = tokenArrayStart
	 384  			return Delim('['), nil
	 385  
	 386  		case ']':
	 387  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
	 388  				return dec.tokenError(c)
	 389  			}
	 390  			dec.scanp++
	 391  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
	 392  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
	 393  			dec.tokenValueEnd()
	 394  			return Delim(']'), nil
	 395  
	 396  		case '{':
	 397  			if !dec.tokenValueAllowed() {
	 398  				return dec.tokenError(c)
	 399  			}
	 400  			dec.scanp++
	 401  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
	 402  			dec.tokenState = tokenObjectStart
	 403  			return Delim('{'), nil
	 404  
	 405  		case '}':
	 406  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
	 407  				return dec.tokenError(c)
	 408  			}
	 409  			dec.scanp++
	 410  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
	 411  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
	 412  			dec.tokenValueEnd()
	 413  			return Delim('}'), nil
	 414  
	 415  		case ':':
	 416  			if dec.tokenState != tokenObjectColon {
	 417  				return dec.tokenError(c)
	 418  			}
	 419  			dec.scanp++
	 420  			dec.tokenState = tokenObjectValue
	 421  			continue
	 422  
	 423  		case ',':
	 424  			if dec.tokenState == tokenArrayComma {
	 425  				dec.scanp++
	 426  				dec.tokenState = tokenArrayValue
	 427  				continue
	 428  			}
	 429  			if dec.tokenState == tokenObjectComma {
	 430  				dec.scanp++
	 431  				dec.tokenState = tokenObjectKey
	 432  				continue
	 433  			}
	 434  			return dec.tokenError(c)
	 435  
	 436  		case '"':
	 437  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
	 438  				var x string
	 439  				old := dec.tokenState
	 440  				dec.tokenState = tokenTopValue
	 441  				err := dec.Decode(&x)
	 442  				dec.tokenState = old
	 443  				if err != nil {
	 444  					return nil, err
	 445  				}
	 446  				dec.tokenState = tokenObjectColon
	 447  				return x, nil
	 448  			}
	 449  			fallthrough
	 450  
	 451  		default:
	 452  			if !dec.tokenValueAllowed() {
	 453  				return dec.tokenError(c)
	 454  			}
	 455  			var x interface{}
	 456  			if err := dec.Decode(&x); err != nil {
	 457  				return nil, err
	 458  			}
	 459  			return x, nil
	 460  		}
	 461  	}
	 462  }
	 463  
	 464  func (dec *Decoder) tokenError(c byte) (Token, error) {
	 465  	var context string
	 466  	switch dec.tokenState {
	 467  	case tokenTopValue:
	 468  		context = " looking for beginning of value"
	 469  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
	 470  		context = " looking for beginning of value"
	 471  	case tokenArrayComma:
	 472  		context = " after array element"
	 473  	case tokenObjectKey:
	 474  		context = " looking for beginning of object key string"
	 475  	case tokenObjectColon:
	 476  		context = " after object key"
	 477  	case tokenObjectComma:
	 478  		context = " after object key:value pair"
	 479  	}
	 480  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
	 481  }
	 482  
	 483  // More reports whether there is another element in the
	 484  // current array or object being parsed.
	 485  func (dec *Decoder) More() bool {
	 486  	c, err := dec.peek()
	 487  	return err == nil && c != ']' && c != '}'
	 488  }
	 489  
	 490  func (dec *Decoder) peek() (byte, error) {
	 491  	var err error
	 492  	for {
	 493  		for i := dec.scanp; i < len(dec.buf); i++ {
	 494  			c := dec.buf[i]
	 495  			if isSpace(c) {
	 496  				continue
	 497  			}
	 498  			dec.scanp = i
	 499  			return c, nil
	 500  		}
	 501  		// buffer has been scanned, now report any error
	 502  		if err != nil {
	 503  			return 0, err
	 504  		}
	 505  		err = dec.refill()
	 506  	}
	 507  }
	 508  
	 509  // InputOffset returns the input stream byte offset of the current decoder position.
	 510  // The offset gives the location of the end of the most recently returned token
	 511  // and the beginning of the next token.
	 512  func (dec *Decoder) InputOffset() int64 {
	 513  	return dec.scanned + int64(dec.scanp)
	 514  }
	 515  

View as plain text