...

Source file src/encoding/xml/xml_test.go

Documentation: encoding/xml

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  package xml
		 6  
		 7  import (
		 8  	"bytes"
		 9  	"fmt"
		10  	"io"
		11  	"reflect"
		12  	"strings"
		13  	"testing"
		14  	"unicode/utf8"
		15  )
		16  
		17  type toks struct {
		18  	earlyEOF bool
		19  	t				[]Token
		20  }
		21  
		22  func (t *toks) Token() (Token, error) {
		23  	if len(t.t) == 0 {
		24  		return nil, io.EOF
		25  	}
		26  	var tok Token
		27  	tok, t.t = t.t[0], t.t[1:]
		28  	if t.earlyEOF && len(t.t) == 0 {
		29  		return tok, io.EOF
		30  	}
		31  	return tok, nil
		32  }
		33  
		34  func TestDecodeEOF(t *testing.T) {
		35  	start := StartElement{Name: Name{Local: "test"}}
		36  	tests := []struct {
		37  		name	 string
		38  		tokens []Token
		39  		ok		 bool
		40  	}{
		41  		{
		42  			name: "OK",
		43  			tokens: []Token{
		44  				start,
		45  				start.End(),
		46  			},
		47  			ok: true,
		48  		},
		49  		{
		50  			name: "Malformed",
		51  			tokens: []Token{
		52  				start,
		53  				StartElement{Name: Name{Local: "bad"}},
		54  				start.End(),
		55  			},
		56  			ok: false,
		57  		},
		58  	}
		59  	for _, tc := range tests {
		60  		for _, eof := range []bool{true, false} {
		61  			name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
		62  			t.Run(name, func(t *testing.T) {
		63  				d := NewTokenDecoder(&toks{
		64  					earlyEOF: eof,
		65  					t:				tc.tokens,
		66  				})
		67  				err := d.Decode(&struct {
		68  					XMLName Name `xml:"test"`
		69  				}{})
		70  				if tc.ok && err != nil {
		71  					t.Fatalf("d.Decode: expected nil error, got %v", err)
		72  				}
		73  				if _, ok := err.(*SyntaxError); !tc.ok && !ok {
		74  					t.Errorf("d.Decode: expected syntax error, got %v", err)
		75  				}
		76  			})
		77  		}
		78  	}
		79  }
		80  
		81  type toksNil struct {
		82  	returnEOF bool
		83  	t				 []Token
		84  }
		85  
		86  func (t *toksNil) Token() (Token, error) {
		87  	if len(t.t) == 0 {
		88  		if !t.returnEOF {
		89  			// Return nil, nil before returning an EOF. It's legal, but
		90  			// discouraged.
		91  			t.returnEOF = true
		92  			return nil, nil
		93  		}
		94  		return nil, io.EOF
		95  	}
		96  	var tok Token
		97  	tok, t.t = t.t[0], t.t[1:]
		98  	return tok, nil
		99  }
	 100  
	 101  func TestDecodeNilToken(t *testing.T) {
	 102  	for _, strict := range []bool{true, false} {
	 103  		name := fmt.Sprintf("Strict=%v", strict)
	 104  		t.Run(name, func(t *testing.T) {
	 105  			start := StartElement{Name: Name{Local: "test"}}
	 106  			bad := StartElement{Name: Name{Local: "bad"}}
	 107  			d := NewTokenDecoder(&toksNil{
	 108  				// Malformed
	 109  				t: []Token{start, bad, start.End()},
	 110  			})
	 111  			d.Strict = strict
	 112  			err := d.Decode(&struct {
	 113  				XMLName Name `xml:"test"`
	 114  			}{})
	 115  			if _, ok := err.(*SyntaxError); !ok {
	 116  				t.Errorf("d.Decode: expected syntax error, got %v", err)
	 117  			}
	 118  		})
	 119  	}
	 120  }
	 121  
	 122  const testInput = `
	 123  <?xml version="1.0" encoding="UTF-8"?>
	 124  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
	 125  	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
	 126  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
	 127  	"\r\n\t" + `	>
	 128  	<hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
	 129  	<query>&何; &is-it;</query>
	 130  	<goodbye />
	 131  	<outer foo:attr="value" xmlns:tag="ns4">
	 132  		<inner/>
	 133  	</outer>
	 134  	<tag:name>
	 135  		<![CDATA[Some text here.]]>
	 136  	</tag:name>
	 137  </body><!-- missing final newline -->`
	 138  
	 139  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
	 140  
	 141  var rawTokens = []Token{
	 142  	CharData("\n"),
	 143  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
	 144  	CharData("\n"),
	 145  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
	 146  	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
	 147  	CharData("\n"),
	 148  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
	 149  	CharData("\n	"),
	 150  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
	 151  	CharData("World <>'\" 白鵬翔"),
	 152  	EndElement{Name{"", "hello"}},
	 153  	CharData("\n	"),
	 154  	StartElement{Name{"", "query"}, []Attr{}},
	 155  	CharData("What is it?"),
	 156  	EndElement{Name{"", "query"}},
	 157  	CharData("\n	"),
	 158  	StartElement{Name{"", "goodbye"}, []Attr{}},
	 159  	EndElement{Name{"", "goodbye"}},
	 160  	CharData("\n	"),
	 161  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
	 162  	CharData("\n		"),
	 163  	StartElement{Name{"", "inner"}, []Attr{}},
	 164  	EndElement{Name{"", "inner"}},
	 165  	CharData("\n	"),
	 166  	EndElement{Name{"", "outer"}},
	 167  	CharData("\n	"),
	 168  	StartElement{Name{"tag", "name"}, []Attr{}},
	 169  	CharData("\n		"),
	 170  	CharData("Some text here."),
	 171  	CharData("\n	"),
	 172  	EndElement{Name{"tag", "name"}},
	 173  	CharData("\n"),
	 174  	EndElement{Name{"", "body"}},
	 175  	Comment(" missing final newline "),
	 176  }
	 177  
	 178  var cookedTokens = []Token{
	 179  	CharData("\n"),
	 180  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
	 181  	CharData("\n"),
	 182  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
	 183  	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
	 184  	CharData("\n"),
	 185  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
	 186  	CharData("\n	"),
	 187  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
	 188  	CharData("World <>'\" 白鵬翔"),
	 189  	EndElement{Name{"ns2", "hello"}},
	 190  	CharData("\n	"),
	 191  	StartElement{Name{"ns2", "query"}, []Attr{}},
	 192  	CharData("What is it?"),
	 193  	EndElement{Name{"ns2", "query"}},
	 194  	CharData("\n	"),
	 195  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
	 196  	EndElement{Name{"ns2", "goodbye"}},
	 197  	CharData("\n	"),
	 198  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
	 199  	CharData("\n		"),
	 200  	StartElement{Name{"ns2", "inner"}, []Attr{}},
	 201  	EndElement{Name{"ns2", "inner"}},
	 202  	CharData("\n	"),
	 203  	EndElement{Name{"ns2", "outer"}},
	 204  	CharData("\n	"),
	 205  	StartElement{Name{"ns3", "name"}, []Attr{}},
	 206  	CharData("\n		"),
	 207  	CharData("Some text here."),
	 208  	CharData("\n	"),
	 209  	EndElement{Name{"ns3", "name"}},
	 210  	CharData("\n"),
	 211  	EndElement{Name{"ns2", "body"}},
	 212  	Comment(" missing final newline "),
	 213  }
	 214  
	 215  const testInputAltEncoding = `
	 216  <?xml version="1.0" encoding="x-testing-uppercase"?>
	 217  <TAG>VALUE</TAG>`
	 218  
	 219  var rawTokensAltEncoding = []Token{
	 220  	CharData("\n"),
	 221  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
	 222  	CharData("\n"),
	 223  	StartElement{Name{"", "tag"}, []Attr{}},
	 224  	CharData("value"),
	 225  	EndElement{Name{"", "tag"}},
	 226  }
	 227  
	 228  var xmlInput = []string{
	 229  	// unexpected EOF cases
	 230  	"<",
	 231  	"<t",
	 232  	"<t ",
	 233  	"<t/",
	 234  	"<!",
	 235  	"<!-",
	 236  	"<!--",
	 237  	"<!--c-",
	 238  	"<!--c--",
	 239  	"<!d",
	 240  	"<t></",
	 241  	"<t></t",
	 242  	"<?",
	 243  	"<?p",
	 244  	"<t a",
	 245  	"<t a=",
	 246  	"<t a='",
	 247  	"<t a=''",
	 248  	"<t/><![",
	 249  	"<t/><![C",
	 250  	"<t/><![CDATA[d",
	 251  	"<t/><![CDATA[d]",
	 252  	"<t/><![CDATA[d]]",
	 253  
	 254  	// other Syntax errors
	 255  	"<>",
	 256  	"<t/a",
	 257  	"<0 />",
	 258  	"<?0 >",
	 259  	//	"<!0 >",	// let the Token() caller handle
	 260  	"</0>",
	 261  	"<t 0=''>",
	 262  	"<t a='&'>",
	 263  	"<t a='<'>",
	 264  	"<t>&nbspc;</t>",
	 265  	"<t a>",
	 266  	"<t a=>",
	 267  	"<t a=v>",
	 268  	//	"<![CDATA[d]]>",	// let the Token() caller handle
	 269  	"<t></e>",
	 270  	"<t></>",
	 271  	"<t></t!",
	 272  	"<t>cdata]]></t>",
	 273  }
	 274  
	 275  func TestRawToken(t *testing.T) {
	 276  	d := NewDecoder(strings.NewReader(testInput))
	 277  	d.Entity = testEntity
	 278  	testRawToken(t, d, testInput, rawTokens)
	 279  }
	 280  
	 281  const nonStrictInput = `
	 282  <tag>non&entity</tag>
	 283  <tag>&unknown;entity</tag>
	 284  <tag>&#123</tag>
	 285  <tag>&#zzz;</tag>
	 286  <tag>&なまえ3;</tag>
	 287  <tag>&lt-gt;</tag>
	 288  <tag>&;</tag>
	 289  <tag>&0a;</tag>
	 290  `
	 291  
	 292  var nonStrictTokens = []Token{
	 293  	CharData("\n"),
	 294  	StartElement{Name{"", "tag"}, []Attr{}},
	 295  	CharData("non&entity"),
	 296  	EndElement{Name{"", "tag"}},
	 297  	CharData("\n"),
	 298  	StartElement{Name{"", "tag"}, []Attr{}},
	 299  	CharData("&unknown;entity"),
	 300  	EndElement{Name{"", "tag"}},
	 301  	CharData("\n"),
	 302  	StartElement{Name{"", "tag"}, []Attr{}},
	 303  	CharData("&#123"),
	 304  	EndElement{Name{"", "tag"}},
	 305  	CharData("\n"),
	 306  	StartElement{Name{"", "tag"}, []Attr{}},
	 307  	CharData("&#zzz;"),
	 308  	EndElement{Name{"", "tag"}},
	 309  	CharData("\n"),
	 310  	StartElement{Name{"", "tag"}, []Attr{}},
	 311  	CharData("&なまえ3;"),
	 312  	EndElement{Name{"", "tag"}},
	 313  	CharData("\n"),
	 314  	StartElement{Name{"", "tag"}, []Attr{}},
	 315  	CharData("&lt-gt;"),
	 316  	EndElement{Name{"", "tag"}},
	 317  	CharData("\n"),
	 318  	StartElement{Name{"", "tag"}, []Attr{}},
	 319  	CharData("&;"),
	 320  	EndElement{Name{"", "tag"}},
	 321  	CharData("\n"),
	 322  	StartElement{Name{"", "tag"}, []Attr{}},
	 323  	CharData("&0a;"),
	 324  	EndElement{Name{"", "tag"}},
	 325  	CharData("\n"),
	 326  }
	 327  
	 328  func TestNonStrictRawToken(t *testing.T) {
	 329  	d := NewDecoder(strings.NewReader(nonStrictInput))
	 330  	d.Strict = false
	 331  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
	 332  }
	 333  
	 334  type downCaser struct {
	 335  	t *testing.T
	 336  	r io.ByteReader
	 337  }
	 338  
	 339  func (d *downCaser) ReadByte() (c byte, err error) {
	 340  	c, err = d.r.ReadByte()
	 341  	if c >= 'A' && c <= 'Z' {
	 342  		c += 'a' - 'A'
	 343  	}
	 344  	return
	 345  }
	 346  
	 347  func (d *downCaser) Read(p []byte) (int, error) {
	 348  	d.t.Fatalf("unexpected Read call on downCaser reader")
	 349  	panic("unreachable")
	 350  }
	 351  
	 352  func TestRawTokenAltEncoding(t *testing.T) {
	 353  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
	 354  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
	 355  		if charset != "x-testing-uppercase" {
	 356  			t.Fatalf("unexpected charset %q", charset)
	 357  		}
	 358  		return &downCaser{t, input.(io.ByteReader)}, nil
	 359  	}
	 360  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
	 361  }
	 362  
	 363  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
	 364  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
	 365  	token, err := d.RawToken()
	 366  	if token == nil {
	 367  		t.Fatalf("expected a token on first RawToken call")
	 368  	}
	 369  	if err != nil {
	 370  		t.Fatal(err)
	 371  	}
	 372  	token, err = d.RawToken()
	 373  	if token != nil {
	 374  		t.Errorf("expected a nil token; got %#v", token)
	 375  	}
	 376  	if err == nil {
	 377  		t.Fatalf("expected an error on second RawToken call")
	 378  	}
	 379  	const encoding = "x-testing-uppercase"
	 380  	if !strings.Contains(err.Error(), encoding) {
	 381  		t.Errorf("expected error to contain %q; got error: %v",
	 382  			encoding, err)
	 383  	}
	 384  }
	 385  
	 386  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
	 387  	lastEnd := int64(0)
	 388  	for i, want := range rawTokens {
	 389  		start := d.InputOffset()
	 390  		have, err := d.RawToken()
	 391  		end := d.InputOffset()
	 392  		if err != nil {
	 393  			t.Fatalf("token %d: unexpected error: %s", i, err)
	 394  		}
	 395  		if !reflect.DeepEqual(have, want) {
	 396  			var shave, swant string
	 397  			if _, ok := have.(CharData); ok {
	 398  				shave = fmt.Sprintf("CharData(%q)", have)
	 399  			} else {
	 400  				shave = fmt.Sprintf("%#v", have)
	 401  			}
	 402  			if _, ok := want.(CharData); ok {
	 403  				swant = fmt.Sprintf("CharData(%q)", want)
	 404  			} else {
	 405  				swant = fmt.Sprintf("%#v", want)
	 406  			}
	 407  			t.Errorf("token %d = %s, want %s", i, shave, swant)
	 408  		}
	 409  
	 410  		// Check that InputOffset returned actual token.
	 411  		switch {
	 412  		case start < lastEnd:
	 413  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
	 414  		case start >= end:
	 415  			// Special case: EndElement can be synthesized.
	 416  			if start == end && end == lastEnd {
	 417  				break
	 418  			}
	 419  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
	 420  		case end > int64(len(raw)):
	 421  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
	 422  		default:
	 423  			text := raw[start:end]
	 424  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
	 425  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
	 426  			}
	 427  		}
	 428  		lastEnd = end
	 429  	}
	 430  }
	 431  
	 432  // Ensure that directives (specifically !DOCTYPE) include the complete
	 433  // text of any nested directives, noting that < and > do not change
	 434  // nesting depth if they are in single or double quotes.
	 435  
	 436  var nestedDirectivesInput = `
	 437  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
	 438  <!DOCTYPE [<!ENTITY xlt ">">]>
	 439  <!DOCTYPE [<!ENTITY xlt "<">]>
	 440  <!DOCTYPE [<!ENTITY xlt '>'>]>
	 441  <!DOCTYPE [<!ENTITY xlt '<'>]>
	 442  <!DOCTYPE [<!ENTITY xlt '">'>]>
	 443  <!DOCTYPE [<!ENTITY xlt "'<">]>
	 444  `
	 445  
	 446  var nestedDirectivesTokens = []Token{
	 447  	CharData("\n"),
	 448  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
	 449  	CharData("\n"),
	 450  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
	 451  	CharData("\n"),
	 452  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
	 453  	CharData("\n"),
	 454  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
	 455  	CharData("\n"),
	 456  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
	 457  	CharData("\n"),
	 458  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
	 459  	CharData("\n"),
	 460  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
	 461  	CharData("\n"),
	 462  }
	 463  
	 464  func TestNestedDirectives(t *testing.T) {
	 465  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
	 466  
	 467  	for i, want := range nestedDirectivesTokens {
	 468  		have, err := d.Token()
	 469  		if err != nil {
	 470  			t.Fatalf("token %d: unexpected error: %s", i, err)
	 471  		}
	 472  		if !reflect.DeepEqual(have, want) {
	 473  			t.Errorf("token %d = %#v want %#v", i, have, want)
	 474  		}
	 475  	}
	 476  }
	 477  
	 478  func TestToken(t *testing.T) {
	 479  	d := NewDecoder(strings.NewReader(testInput))
	 480  	d.Entity = testEntity
	 481  
	 482  	for i, want := range cookedTokens {
	 483  		have, err := d.Token()
	 484  		if err != nil {
	 485  			t.Fatalf("token %d: unexpected error: %s", i, err)
	 486  		}
	 487  		if !reflect.DeepEqual(have, want) {
	 488  			t.Errorf("token %d = %#v want %#v", i, have, want)
	 489  		}
	 490  	}
	 491  }
	 492  
	 493  func TestSyntax(t *testing.T) {
	 494  	for i := range xmlInput {
	 495  		d := NewDecoder(strings.NewReader(xmlInput[i]))
	 496  		var err error
	 497  		for _, err = d.Token(); err == nil; _, err = d.Token() {
	 498  		}
	 499  		if _, ok := err.(*SyntaxError); !ok {
	 500  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
	 501  		}
	 502  	}
	 503  }
	 504  
	 505  type allScalars struct {
	 506  	True1		 bool
	 507  	True2		 bool
	 508  	False1		bool
	 509  	False2		bool
	 510  	Int			 int
	 511  	Int8			int8
	 512  	Int16		 int16
	 513  	Int32		 int32
	 514  	Int64		 int64
	 515  	Uint			int
	 516  	Uint8		 uint8
	 517  	Uint16		uint16
	 518  	Uint32		uint32
	 519  	Uint64		uint64
	 520  	Uintptr	 uintptr
	 521  	Float32	 float32
	 522  	Float64	 float64
	 523  	String		string
	 524  	PtrString *string
	 525  }
	 526  
	 527  var all = allScalars{
	 528  	True1:		 true,
	 529  	True2:		 true,
	 530  	False1:		false,
	 531  	False2:		false,
	 532  	Int:			 1,
	 533  	Int8:			-2,
	 534  	Int16:		 3,
	 535  	Int32:		 -4,
	 536  	Int64:		 5,
	 537  	Uint:			6,
	 538  	Uint8:		 7,
	 539  	Uint16:		8,
	 540  	Uint32:		9,
	 541  	Uint64:		10,
	 542  	Uintptr:	 11,
	 543  	Float32:	 13.0,
	 544  	Float64:	 14.0,
	 545  	String:		"15",
	 546  	PtrString: &sixteen,
	 547  }
	 548  
	 549  var sixteen = "16"
	 550  
	 551  const testScalarsInput = `<allscalars>
	 552  	<True1>true</True1>
	 553  	<True2>1</True2>
	 554  	<False1>false</False1>
	 555  	<False2>0</False2>
	 556  	<Int>1</Int>
	 557  	<Int8>-2</Int8>
	 558  	<Int16>3</Int16>
	 559  	<Int32>-4</Int32>
	 560  	<Int64>5</Int64>
	 561  	<Uint>6</Uint>
	 562  	<Uint8>7</Uint8>
	 563  	<Uint16>8</Uint16>
	 564  	<Uint32>9</Uint32>
	 565  	<Uint64>10</Uint64>
	 566  	<Uintptr>11</Uintptr>
	 567  	<Float>12.0</Float>
	 568  	<Float32>13.0</Float32>
	 569  	<Float64>14.0</Float64>
	 570  	<String>15</String>
	 571  	<PtrString>16</PtrString>
	 572  </allscalars>`
	 573  
	 574  func TestAllScalars(t *testing.T) {
	 575  	var a allScalars
	 576  	err := Unmarshal([]byte(testScalarsInput), &a)
	 577  
	 578  	if err != nil {
	 579  		t.Fatal(err)
	 580  	}
	 581  	if !reflect.DeepEqual(a, all) {
	 582  		t.Errorf("have %+v want %+v", a, all)
	 583  	}
	 584  }
	 585  
	 586  type item struct {
	 587  	FieldA string
	 588  }
	 589  
	 590  func TestIssue569(t *testing.T) {
	 591  	data := `<item><FieldA>abcd</FieldA></item>`
	 592  	var i item
	 593  	err := Unmarshal([]byte(data), &i)
	 594  
	 595  	if err != nil || i.FieldA != "abcd" {
	 596  		t.Fatal("Expecting abcd")
	 597  	}
	 598  }
	 599  
	 600  func TestUnquotedAttrs(t *testing.T) {
	 601  	data := "<tag attr=azAZ09:-_\t>"
	 602  	d := NewDecoder(strings.NewReader(data))
	 603  	d.Strict = false
	 604  	token, err := d.Token()
	 605  	if _, ok := err.(*SyntaxError); ok {
	 606  		t.Errorf("Unexpected error: %v", err)
	 607  	}
	 608  	if token.(StartElement).Name.Local != "tag" {
	 609  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
	 610  	}
	 611  	attr := token.(StartElement).Attr[0]
	 612  	if attr.Value != "azAZ09:-_" {
	 613  		t.Errorf("Unexpected attribute value: %v", attr.Value)
	 614  	}
	 615  	if attr.Name.Local != "attr" {
	 616  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
	 617  	}
	 618  }
	 619  
	 620  func TestValuelessAttrs(t *testing.T) {
	 621  	tests := [][3]string{
	 622  		{"<p nowrap>", "p", "nowrap"},
	 623  		{"<p nowrap >", "p", "nowrap"},
	 624  		{"<input checked/>", "input", "checked"},
	 625  		{"<input checked />", "input", "checked"},
	 626  	}
	 627  	for _, test := range tests {
	 628  		d := NewDecoder(strings.NewReader(test[0]))
	 629  		d.Strict = false
	 630  		token, err := d.Token()
	 631  		if _, ok := err.(*SyntaxError); ok {
	 632  			t.Errorf("Unexpected error: %v", err)
	 633  		}
	 634  		if token.(StartElement).Name.Local != test[1] {
	 635  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
	 636  		}
	 637  		attr := token.(StartElement).Attr[0]
	 638  		if attr.Value != test[2] {
	 639  			t.Errorf("Unexpected attribute value: %v", attr.Value)
	 640  		}
	 641  		if attr.Name.Local != test[2] {
	 642  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
	 643  		}
	 644  	}
	 645  }
	 646  
	 647  func TestCopyTokenCharData(t *testing.T) {
	 648  	data := []byte("same data")
	 649  	var tok1 Token = CharData(data)
	 650  	tok2 := CopyToken(tok1)
	 651  	if !reflect.DeepEqual(tok1, tok2) {
	 652  		t.Error("CopyToken(CharData) != CharData")
	 653  	}
	 654  	data[1] = 'o'
	 655  	if reflect.DeepEqual(tok1, tok2) {
	 656  		t.Error("CopyToken(CharData) uses same buffer.")
	 657  	}
	 658  }
	 659  
	 660  func TestCopyTokenStartElement(t *testing.T) {
	 661  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
	 662  	var tok1 Token = elt
	 663  	tok2 := CopyToken(tok1)
	 664  	if tok1.(StartElement).Attr[0].Value != "en" {
	 665  		t.Error("CopyToken overwrote Attr[0]")
	 666  	}
	 667  	if !reflect.DeepEqual(tok1, tok2) {
	 668  		t.Error("CopyToken(StartElement) != StartElement")
	 669  	}
	 670  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
	 671  	if reflect.DeepEqual(tok1, tok2) {
	 672  		t.Error("CopyToken(CharData) uses same buffer.")
	 673  	}
	 674  }
	 675  
	 676  func TestSyntaxErrorLineNum(t *testing.T) {
	 677  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
	 678  	d := NewDecoder(strings.NewReader(testInput))
	 679  	var err error
	 680  	for _, err = d.Token(); err == nil; _, err = d.Token() {
	 681  	}
	 682  	synerr, ok := err.(*SyntaxError)
	 683  	if !ok {
	 684  		t.Error("Expected SyntaxError.")
	 685  	}
	 686  	if synerr.Line != 3 {
	 687  		t.Error("SyntaxError didn't have correct line number.")
	 688  	}
	 689  }
	 690  
	 691  func TestTrailingRawToken(t *testing.T) {
	 692  	input := `<FOO></FOO>	`
	 693  	d := NewDecoder(strings.NewReader(input))
	 694  	var err error
	 695  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
	 696  	}
	 697  	if err != io.EOF {
	 698  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
	 699  	}
	 700  }
	 701  
	 702  func TestTrailingToken(t *testing.T) {
	 703  	input := `<FOO></FOO>	`
	 704  	d := NewDecoder(strings.NewReader(input))
	 705  	var err error
	 706  	for _, err = d.Token(); err == nil; _, err = d.Token() {
	 707  	}
	 708  	if err != io.EOF {
	 709  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
	 710  	}
	 711  }
	 712  
	 713  func TestEntityInsideCDATA(t *testing.T) {
	 714  	input := `<test><![CDATA[ &val=foo ]]></test>`
	 715  	d := NewDecoder(strings.NewReader(input))
	 716  	var err error
	 717  	for _, err = d.Token(); err == nil; _, err = d.Token() {
	 718  	}
	 719  	if err != io.EOF {
	 720  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
	 721  	}
	 722  }
	 723  
	 724  var characterTests = []struct {
	 725  	in	string
	 726  	err string
	 727  }{
	 728  	{"\x12<doc/>", "illegal character code U+0012"},
	 729  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
	 730  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
	 731  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
	 732  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
	 733  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
	 734  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
	 735  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
	 736  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
	 737  }
	 738  
	 739  func TestDisallowedCharacters(t *testing.T) {
	 740  
	 741  	for i, tt := range characterTests {
	 742  		d := NewDecoder(strings.NewReader(tt.in))
	 743  		var err error
	 744  
	 745  		for err == nil {
	 746  			_, err = d.Token()
	 747  		}
	 748  		synerr, ok := err.(*SyntaxError)
	 749  		if !ok {
	 750  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
	 751  		}
	 752  		if synerr.Msg != tt.err {
	 753  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
	 754  		}
	 755  	}
	 756  }
	 757  
	 758  func TestIsInCharacterRange(t *testing.T) {
	 759  	invalid := []rune{
	 760  		utf8.MaxRune + 1,
	 761  		0xD800, // surrogate min
	 762  		0xDFFF, // surrogate max
	 763  		-1,
	 764  	}
	 765  	for _, r := range invalid {
	 766  		if isInCharacterRange(r) {
	 767  			t.Errorf("rune %U considered valid", r)
	 768  		}
	 769  	}
	 770  }
	 771  
	 772  var procInstTests = []struct {
	 773  	input	string
	 774  	expect [2]string
	 775  }{
	 776  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
	 777  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
	 778  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
	 779  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
	 780  	{`encoding="FOO" `, [2]string{"", "FOO"}},
	 781  }
	 782  
	 783  func TestProcInstEncoding(t *testing.T) {
	 784  	for _, test := range procInstTests {
	 785  		if got := procInst("version", test.input); got != test.expect[0] {
	 786  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
	 787  		}
	 788  		if got := procInst("encoding", test.input); got != test.expect[1] {
	 789  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
	 790  		}
	 791  	}
	 792  }
	 793  
	 794  // Ensure that directives with comments include the complete
	 795  // text of any nested directives.
	 796  
	 797  var directivesWithCommentsInput = `
	 798  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
	 799  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
	 800  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
	 801  `
	 802  
	 803  var directivesWithCommentsTokens = []Token{
	 804  	CharData("\n"),
	 805  	Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
	 806  	CharData("\n"),
	 807  	Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
	 808  	CharData("\n"),
	 809  	Directive(`DOCTYPE <!-> <!>			 [<!ENTITY go "Golang"> ]`),
	 810  	CharData("\n"),
	 811  }
	 812  
	 813  func TestDirectivesWithComments(t *testing.T) {
	 814  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
	 815  
	 816  	for i, want := range directivesWithCommentsTokens {
	 817  		have, err := d.Token()
	 818  		if err != nil {
	 819  			t.Fatalf("token %d: unexpected error: %s", i, err)
	 820  		}
	 821  		if !reflect.DeepEqual(have, want) {
	 822  			t.Errorf("token %d = %#v want %#v", i, have, want)
	 823  		}
	 824  	}
	 825  }
	 826  
	 827  // Writer whose Write method always returns an error.
	 828  type errWriter struct{}
	 829  
	 830  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
	 831  
	 832  func TestEscapeTextIOErrors(t *testing.T) {
	 833  	expectErr := "unwritable"
	 834  	err := EscapeText(errWriter{}, []byte{'A'})
	 835  
	 836  	if err == nil || err.Error() != expectErr {
	 837  		t.Errorf("have %v, want %v", err, expectErr)
	 838  	}
	 839  }
	 840  
	 841  func TestEscapeTextInvalidChar(t *testing.T) {
	 842  	input := []byte("A \x00 terminated string.")
	 843  	expected := "A \uFFFD terminated string."
	 844  
	 845  	buff := new(bytes.Buffer)
	 846  	if err := EscapeText(buff, input); err != nil {
	 847  		t.Fatalf("have %v, want nil", err)
	 848  	}
	 849  	text := buff.String()
	 850  
	 851  	if text != expected {
	 852  		t.Errorf("have %v, want %v", text, expected)
	 853  	}
	 854  }
	 855  
	 856  func TestIssue5880(t *testing.T) {
	 857  	type T []byte
	 858  	data, err := Marshal(T{192, 168, 0, 1})
	 859  	if err != nil {
	 860  		t.Errorf("Marshal error: %v", err)
	 861  	}
	 862  	if !utf8.Valid(data) {
	 863  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
	 864  	}
	 865  }
	 866  
	 867  func TestIssue11405(t *testing.T) {
	 868  	testCases := []string{
	 869  		"<root>",
	 870  		"<root><foo>",
	 871  		"<root><foo></foo>",
	 872  	}
	 873  	for _, tc := range testCases {
	 874  		d := NewDecoder(strings.NewReader(tc))
	 875  		var err error
	 876  		for {
	 877  			_, err = d.Token()
	 878  			if err != nil {
	 879  				break
	 880  			}
	 881  		}
	 882  		if _, ok := err.(*SyntaxError); !ok {
	 883  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
	 884  		}
	 885  	}
	 886  }
	 887  
	 888  func TestIssue12417(t *testing.T) {
	 889  	testCases := []struct {
	 890  		s	string
	 891  		ok bool
	 892  	}{
	 893  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
	 894  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
	 895  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
	 896  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
	 897  	}
	 898  	for _, tc := range testCases {
	 899  		d := NewDecoder(strings.NewReader(tc.s))
	 900  		var err error
	 901  		for {
	 902  			_, err = d.Token()
	 903  			if err != nil {
	 904  				if err == io.EOF {
	 905  					err = nil
	 906  				}
	 907  				break
	 908  			}
	 909  		}
	 910  		if err != nil && tc.ok {
	 911  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
	 912  			continue
	 913  		}
	 914  		if err == nil && !tc.ok {
	 915  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
	 916  		}
	 917  	}
	 918  }
	 919  
	 920  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
	 921  	return func(src TokenReader) TokenReader {
	 922  		return mapper{
	 923  			t: src,
	 924  			f: mapping,
	 925  		}
	 926  	}
	 927  }
	 928  
	 929  type mapper struct {
	 930  	t TokenReader
	 931  	f func(Token) Token
	 932  }
	 933  
	 934  func (m mapper) Token() (Token, error) {
	 935  	tok, err := m.t.Token()
	 936  	if err != nil {
	 937  		return nil, err
	 938  	}
	 939  	return m.f(tok), nil
	 940  }
	 941  
	 942  func TestNewTokenDecoderIdempotent(t *testing.T) {
	 943  	d := NewDecoder(strings.NewReader(`<br>`))
	 944  	d2 := NewTokenDecoder(d)
	 945  	if d != d2 {
	 946  		t.Error("NewTokenDecoder did not detect underlying Decoder")
	 947  	}
	 948  }
	 949  
	 950  func TestWrapDecoder(t *testing.T) {
	 951  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
	 952  	m := tokenMap(func(t Token) Token {
	 953  		switch tok := t.(type) {
	 954  		case StartElement:
	 955  			if tok.Name.Local == "quote" {
	 956  				tok.Name.Local = "blocking"
	 957  				return tok
	 958  			}
	 959  		case EndElement:
	 960  			if tok.Name.Local == "quote" {
	 961  				tok.Name.Local = "blocking"
	 962  				return tok
	 963  			}
	 964  		}
	 965  		return t
	 966  	})
	 967  
	 968  	d = NewTokenDecoder(m(d))
	 969  
	 970  	o := struct {
	 971  		XMLName	Name	 `xml:"blocking"`
	 972  		Chardata string `xml:",chardata"`
	 973  	}{}
	 974  
	 975  	if err := d.Decode(&o); err != nil {
	 976  		t.Fatal("Got unexpected error while decoding:", err)
	 977  	}
	 978  
	 979  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
	 980  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
	 981  	}
	 982  }
	 983  
	 984  type tokReader struct{}
	 985  
	 986  func (tokReader) Token() (Token, error) {
	 987  	return StartElement{}, nil
	 988  }
	 989  
	 990  type Failure struct{}
	 991  
	 992  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
	 993  	return nil
	 994  }
	 995  
	 996  func TestTokenUnmarshaler(t *testing.T) {
	 997  	defer func() {
	 998  		if r := recover(); r != nil {
	 999  			t.Error("Unexpected panic using custom token unmarshaler")
	1000  		}
	1001  	}()
	1002  
	1003  	d := NewTokenDecoder(tokReader{})
	1004  	d.Decode(&Failure{})
	1005  }
	1006  
	1007  func testRoundTrip(t *testing.T, input string) {
	1008  	d := NewDecoder(strings.NewReader(input))
	1009  	var tokens []Token
	1010  	var buf bytes.Buffer
	1011  	e := NewEncoder(&buf)
	1012  	for {
	1013  		tok, err := d.Token()
	1014  		if err == io.EOF {
	1015  			break
	1016  		}
	1017  		if err != nil {
	1018  			t.Fatalf("invalid input: %v", err)
	1019  		}
	1020  		if err := e.EncodeToken(tok); err != nil {
	1021  			t.Fatalf("failed to re-encode input: %v", err)
	1022  		}
	1023  		tokens = append(tokens, CopyToken(tok))
	1024  	}
	1025  	if err := e.Flush(); err != nil {
	1026  		t.Fatal(err)
	1027  	}
	1028  
	1029  	d = NewDecoder(&buf)
	1030  	for {
	1031  		tok, err := d.Token()
	1032  		if err == io.EOF {
	1033  			break
	1034  		}
	1035  		if err != nil {
	1036  			t.Fatalf("failed to decode output: %v", err)
	1037  		}
	1038  		if len(tokens) == 0 {
	1039  			t.Fatalf("unexpected token: %#v", tok)
	1040  		}
	1041  		a, b := tokens[0], tok
	1042  		if !reflect.DeepEqual(a, b) {
	1043  			t.Fatalf("token mismatch: %#v vs %#v", a, b)
	1044  		}
	1045  		tokens = tokens[1:]
	1046  	}
	1047  	if len(tokens) > 0 {
	1048  		t.Fatalf("lost tokens: %#v", tokens)
	1049  	}
	1050  }
	1051  
	1052  func TestRoundTrip(t *testing.T) {
	1053  	tests := map[string]string{
	1054  		"leading colon":					`<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`,
	1055  		"trailing colon":				 `<foo abc:="x"></foo>`,
	1056  		"double colon":					 `<x:y:foo></x:y:foo>`,
	1057  		"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
	1058  	}
	1059  	for name, input := range tests {
	1060  		t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
	1061  	}
	1062  }
	1063  

View as plain text