Source file src/encoding/json/stream.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  // A Decoder reads and decodes JSON values from an input stream.
    14  type Decoder struct {
    15  	r       io.Reader
    16  	buf     []byte
    17  	d       decodeState
    18  	scanp   int   // start of unread data in buf
    19  	scanned int64 // amount of data already scanned
    20  	scan    scanner
    21  	err     error
    22  
    23  	tokenState int
    24  	tokenStack []int
    25  }
    26  
    27  // NewDecoder returns a new decoder that reads from r.
    28  //
    29  // The decoder introduces its own buffering and may
    30  // read data from r beyond the JSON values requested.
    31  func NewDecoder(r io.Reader) *Decoder {
    32  	return &Decoder{r: r}
    33  }
    34  
    35  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    36  // [Number] instead of as a float64.
    37  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    38  
    39  // DisallowUnknownFields causes the Decoder to return an error when the destination
    40  // is a struct and the input contains object keys which do not match any
    41  // non-ignored, exported fields in the destination.
    42  func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
    43  
    44  // Decode reads the next JSON-encoded value from its
    45  // input and stores it in the value pointed to by v.
    46  //
    47  // See the documentation for [Unmarshal] for details about
    48  // the conversion of JSON into a Go value.
    49  func (dec *Decoder) Decode(v any) error {
    50  	if dec.err != nil {
    51  		return dec.err
    52  	}
    53  
    54  	if err := dec.tokenPrepareForDecode(); err != nil {
    55  		return err
    56  	}
    57  
    58  	if !dec.tokenValueAllowed() {
    59  		return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()}
    60  	}
    61  
    62  	// Read whole value into buffer.
    63  	n, err := dec.readValue()
    64  	if err != nil {
    65  		return err
    66  	}
    67  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    68  	dec.scanp += n
    69  
    70  	// Don't save err from unmarshal into dec.err:
    71  	// the connection is still usable since we read a complete JSON
    72  	// object from it before the error happened.
    73  	err = dec.d.unmarshal(v)
    74  
    75  	// fixup token streaming state
    76  	dec.tokenValueEnd()
    77  
    78  	return err
    79  }
    80  
    81  // Buffered returns a reader of the data remaining in the Decoder's
    82  // buffer. The reader is valid until the next call to [Decoder.Decode].
    83  func (dec *Decoder) Buffered() io.Reader {
    84  	return bytes.NewReader(dec.buf[dec.scanp:])
    85  }
    86  
    87  // readValue reads a JSON value into dec.buf.
    88  // It returns the length of the encoding.
    89  func (dec *Decoder) readValue() (int, error) {
    90  	dec.scan.reset()
    91  
    92  	scanp := dec.scanp
    93  	var err error
    94  Input:
    95  	// help the compiler see that scanp is never negative, so it can remove
    96  	// some bounds checks below.
    97  	for scanp >= 0 {
    98  
    99  		// Look in the buffer for a new value.
   100  		for ; scanp < len(dec.buf); scanp++ {
   101  			c := dec.buf[scanp]
   102  			dec.scan.bytes++
   103  			switch dec.scan.step(&dec.scan, c) {
   104  			case scanEnd:
   105  				// scanEnd is delayed one byte so we decrement
   106  				// the scanner bytes count by 1 to ensure that
   107  				// this value is correct in the next call of Decode.
   108  				dec.scan.bytes--
   109  				break Input
   110  			case scanEndObject, scanEndArray:
   111  				// scanEnd is delayed one byte.
   112  				// We might block trying to get that byte from src,
   113  				// so instead invent a space byte.
   114  				if stateEndValue(&dec.scan, ' ') == scanEnd {
   115  					scanp++
   116  					break Input
   117  				}
   118  			case scanError:
   119  				dec.err = dec.scan.err
   120  				return 0, dec.scan.err
   121  			}
   122  		}
   123  
   124  		// Did the last read have an error?
   125  		// Delayed until now to allow buffer scan.
   126  		if err != nil {
   127  			if err == io.EOF {
   128  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   129  					break Input
   130  				}
   131  				if nonSpace(dec.buf) {
   132  					err = io.ErrUnexpectedEOF
   133  				}
   134  			}
   135  			dec.err = err
   136  			return 0, err
   137  		}
   138  
   139  		n := scanp - dec.scanp
   140  		err = dec.refill()
   141  		scanp = dec.scanp + n
   142  	}
   143  	return scanp - dec.scanp, nil
   144  }
   145  
   146  func (dec *Decoder) refill() error {
   147  	// Make room to read more into the buffer.
   148  	// First slide down data already consumed.
   149  	if dec.scanp > 0 {
   150  		dec.scanned += int64(dec.scanp)
   151  		n := copy(dec.buf, dec.buf[dec.scanp:])
   152  		dec.buf = dec.buf[:n]
   153  		dec.scanp = 0
   154  	}
   155  
   156  	// Grow buffer if not large enough.
   157  	const minRead = 512
   158  	if cap(dec.buf)-len(dec.buf) < minRead {
   159  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   160  		copy(newBuf, dec.buf)
   161  		dec.buf = newBuf
   162  	}
   163  
   164  	// Read. Delay error for next iteration (after scan).
   165  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   166  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   167  
   168  	return err
   169  }
   170  
   171  func nonSpace(b []byte) bool {
   172  	for _, c := range b {
   173  		if !isSpace(c) {
   174  			return true
   175  		}
   176  	}
   177  	return false
   178  }
   179  
   180  // An Encoder writes JSON values to an output stream.
   181  type Encoder struct {
   182  	w          io.Writer
   183  	err        error
   184  	escapeHTML bool
   185  
   186  	indentBuf    []byte
   187  	indentPrefix string
   188  	indentValue  string
   189  }
   190  
   191  // NewEncoder returns a new encoder that writes to w.
   192  func NewEncoder(w io.Writer) *Encoder {
   193  	return &Encoder{w: w, escapeHTML: true}
   194  }
   195  
   196  // Encode writes the JSON encoding of v to the stream,
   197  // with insignificant space characters elided,
   198  // followed by a newline character.
   199  //
   200  // See the documentation for [Marshal] for details about the
   201  // conversion of Go values to JSON.
   202  func (enc *Encoder) Encode(v any) error {
   203  	if enc.err != nil {
   204  		return enc.err
   205  	}
   206  
   207  	e := newEncodeState()
   208  	defer encodeStatePool.Put(e)
   209  
   210  	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
   211  	if err != nil {
   212  		return err
   213  	}
   214  
   215  	// Terminate each value with a newline.
   216  	// This makes the output look a little nicer
   217  	// when debugging, and some kind of space
   218  	// is required if the encoded value was a number,
   219  	// so that the reader knows there aren't more
   220  	// digits coming.
   221  	e.WriteByte('\n')
   222  
   223  	b := e.Bytes()
   224  	if enc.indentPrefix != "" || enc.indentValue != "" {
   225  		enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue)
   226  		if err != nil {
   227  			return err
   228  		}
   229  		b = enc.indentBuf
   230  	}
   231  	if _, err = enc.w.Write(b); err != nil {
   232  		enc.err = err
   233  	}
   234  	return err
   235  }
   236  
   237  // SetIndent instructs the encoder to format each subsequent encoded
   238  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
   239  // Calling SetIndent("", "") disables indentation.
   240  func (enc *Encoder) SetIndent(prefix, indent string) {
   241  	enc.indentPrefix = prefix
   242  	enc.indentValue = indent
   243  }
   244  
   245  // SetEscapeHTML specifies whether problematic HTML characters
   246  // should be escaped inside JSON quoted strings.
   247  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   248  // to avoid certain safety problems that can arise when embedding JSON in HTML.
   249  //
   250  // In non-HTML settings where the escaping interferes with the readability
   251  // of the output, SetEscapeHTML(false) disables this behavior.
   252  func (enc *Encoder) SetEscapeHTML(on bool) {
   253  	enc.escapeHTML = on
   254  }
   255  
   256  // RawMessage is a raw encoded JSON value.
   257  // It implements [Marshaler] and [Unmarshaler] and can
   258  // be used to delay JSON decoding or precompute a JSON encoding.
   259  type RawMessage []byte
   260  
   261  // MarshalJSON returns m as the JSON encoding of m.
   262  func (m RawMessage) MarshalJSON() ([]byte, error) {
   263  	if m == nil {
   264  		return []byte("null"), nil
   265  	}
   266  	return m, nil
   267  }
   268  
   269  // UnmarshalJSON sets *m to a copy of data.
   270  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   271  	if m == nil {
   272  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   273  	}
   274  	*m = append((*m)[0:0], data...)
   275  	return nil
   276  }
   277  
   278  var _ Marshaler = (*RawMessage)(nil)
   279  var _ Unmarshaler = (*RawMessage)(nil)
   280  
   281  // A Token holds a value of one of these types:
   282  //
   283  //   - [Delim], for the four JSON delimiters [ ] { }
   284  //   - bool, for JSON booleans
   285  //   - float64, for JSON numbers
   286  //   - [Number], for JSON numbers
   287  //   - string, for JSON string literals
   288  //   - nil, for JSON null
   289  type Token any
   290  
   291  const (
   292  	tokenTopValue = iota
   293  	tokenArrayStart
   294  	tokenArrayValue
   295  	tokenArrayComma
   296  	tokenObjectStart
   297  	tokenObjectKey
   298  	tokenObjectColon
   299  	tokenObjectValue
   300  	tokenObjectComma
   301  )
   302  
   303  // advance tokenstate from a separator state to a value state
   304  func (dec *Decoder) tokenPrepareForDecode() error {
   305  	// Note: Not calling peek before switch, to avoid
   306  	// putting peek into the standard Decode path.
   307  	// peek is only called when using the Token API.
   308  	switch dec.tokenState {
   309  	case tokenArrayComma:
   310  		c, err := dec.peek()
   311  		if err != nil {
   312  			return err
   313  		}
   314  		if c != ',' {
   315  			return &SyntaxError{"expected comma after array element", dec.InputOffset()}
   316  		}
   317  		dec.scanp++
   318  		dec.tokenState = tokenArrayValue
   319  	case tokenObjectColon:
   320  		c, err := dec.peek()
   321  		if err != nil {
   322  			return err
   323  		}
   324  		if c != ':' {
   325  			return &SyntaxError{"expected colon after object key", dec.InputOffset()}
   326  		}
   327  		dec.scanp++
   328  		dec.tokenState = tokenObjectValue
   329  	}
   330  	return nil
   331  }
   332  
   333  func (dec *Decoder) tokenValueAllowed() bool {
   334  	switch dec.tokenState {
   335  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   336  		return true
   337  	}
   338  	return false
   339  }
   340  
   341  func (dec *Decoder) tokenValueEnd() {
   342  	switch dec.tokenState {
   343  	case tokenArrayStart, tokenArrayValue:
   344  		dec.tokenState = tokenArrayComma
   345  	case tokenObjectValue:
   346  		dec.tokenState = tokenObjectComma
   347  	}
   348  }
   349  
   350  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   351  type Delim rune
   352  
   353  func (d Delim) String() string {
   354  	return string(d)
   355  }
   356  
   357  // Token returns the next JSON token in the input stream.
   358  // At the end of the input stream, Token returns nil, [io.EOF].
   359  //
   360  // Token guarantees that the delimiters [ ] { } it returns are
   361  // properly nested and matched: if Token encounters an unexpected
   362  // delimiter in the input, it will return an error.
   363  //
   364  // The input stream consists of basic JSON values—bool, string,
   365  // number, and null—along with delimiters [ ] { } of type [Delim]
   366  // to mark the start and end of arrays and objects.
   367  // Commas and colons are elided.
   368  func (dec *Decoder) Token() (Token, error) {
   369  	for {
   370  		c, err := dec.peek()
   371  		if err != nil {
   372  			return nil, err
   373  		}
   374  		switch c {
   375  		case '[':
   376  			if !dec.tokenValueAllowed() {
   377  				return dec.tokenError(c)
   378  			}
   379  			dec.scanp++
   380  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   381  			dec.tokenState = tokenArrayStart
   382  			return Delim('['), nil
   383  
   384  		case ']':
   385  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   386  				return dec.tokenError(c)
   387  			}
   388  			dec.scanp++
   389  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   390  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   391  			dec.tokenValueEnd()
   392  			return Delim(']'), nil
   393  
   394  		case '{':
   395  			if !dec.tokenValueAllowed() {
   396  				return dec.tokenError(c)
   397  			}
   398  			dec.scanp++
   399  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   400  			dec.tokenState = tokenObjectStart
   401  			return Delim('{'), nil
   402  
   403  		case '}':
   404  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   405  				return dec.tokenError(c)
   406  			}
   407  			dec.scanp++
   408  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   409  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   410  			dec.tokenValueEnd()
   411  			return Delim('}'), nil
   412  
   413  		case ':':
   414  			if dec.tokenState != tokenObjectColon {
   415  				return dec.tokenError(c)
   416  			}
   417  			dec.scanp++
   418  			dec.tokenState = tokenObjectValue
   419  			continue
   420  
   421  		case ',':
   422  			if dec.tokenState == tokenArrayComma {
   423  				dec.scanp++
   424  				dec.tokenState = tokenArrayValue
   425  				continue
   426  			}
   427  			if dec.tokenState == tokenObjectComma {
   428  				dec.scanp++
   429  				dec.tokenState = tokenObjectKey
   430  				continue
   431  			}
   432  			return dec.tokenError(c)
   433  
   434  		case '"':
   435  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   436  				var x string
   437  				old := dec.tokenState
   438  				dec.tokenState = tokenTopValue
   439  				err := dec.Decode(&x)
   440  				dec.tokenState = old
   441  				if err != nil {
   442  					return nil, err
   443  				}
   444  				dec.tokenState = tokenObjectColon
   445  				return x, nil
   446  			}
   447  			fallthrough
   448  
   449  		default:
   450  			if !dec.tokenValueAllowed() {
   451  				return dec.tokenError(c)
   452  			}
   453  			var x any
   454  			if err := dec.Decode(&x); err != nil {
   455  				return nil, err
   456  			}
   457  			return x, nil
   458  		}
   459  	}
   460  }
   461  
   462  func (dec *Decoder) tokenError(c byte) (Token, error) {
   463  	var context string
   464  	switch dec.tokenState {
   465  	case tokenTopValue:
   466  		context = " looking for beginning of value"
   467  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   468  		context = " looking for beginning of value"
   469  	case tokenArrayComma:
   470  		context = " after array element"
   471  	case tokenObjectKey:
   472  		context = " looking for beginning of object key string"
   473  	case tokenObjectColon:
   474  		context = " after object key"
   475  	case tokenObjectComma:
   476  		context = " after object key:value pair"
   477  	}
   478  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()}
   479  }
   480  
   481  // More reports whether there is another element in the
   482  // current array or object being parsed.
   483  func (dec *Decoder) More() bool {
   484  	c, err := dec.peek()
   485  	return err == nil && c != ']' && c != '}'
   486  }
   487  
   488  func (dec *Decoder) peek() (byte, error) {
   489  	var err error
   490  	for {
   491  		for i := dec.scanp; i < len(dec.buf); i++ {
   492  			c := dec.buf[i]
   493  			if isSpace(c) {
   494  				continue
   495  			}
   496  			dec.scanp = i
   497  			return c, nil
   498  		}
   499  		// buffer has been scanned, now report any error
   500  		if err != nil {
   501  			return 0, err
   502  		}
   503  		err = dec.refill()
   504  	}
   505  }
   506  
   507  // InputOffset returns the input stream byte offset of the current decoder position.
   508  // The offset gives the location of the end of the most recently returned token
   509  // and the beginning of the next token.
   510  func (dec *Decoder) InputOffset() int64 {
   511  	return dec.scanned + int64(dec.scanp)
   512  }
   513  

View as plain text