1
2
3
4
5 package xml
6
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "reflect"
12 "strings"
13 "testing"
14 "unicode/utf8"
15 )
16
17 type toks struct {
18 earlyEOF bool
19 t []Token
20 }
21
22 func (t *toks) Token() (Token, error) {
23 if len(t.t) == 0 {
24 return nil, io.EOF
25 }
26 var tok Token
27 tok, t.t = t.t[0], t.t[1:]
28 if t.earlyEOF && len(t.t) == 0 {
29 return tok, io.EOF
30 }
31 return tok, nil
32 }
33
34 func TestDecodeEOF(t *testing.T) {
35 start := StartElement{Name: Name{Local: "test"}}
36 tests := []struct {
37 name string
38 tokens []Token
39 ok bool
40 }{
41 {
42 name: "OK",
43 tokens: []Token{
44 start,
45 start.End(),
46 },
47 ok: true,
48 },
49 {
50 name: "Malformed",
51 tokens: []Token{
52 start,
53 StartElement{Name: Name{Local: "bad"}},
54 start.End(),
55 },
56 ok: false,
57 },
58 }
59 for _, tc := range tests {
60 for _, eof := range []bool{true, false} {
61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
62 t.Run(name, func(t *testing.T) {
63 d := NewTokenDecoder(&toks{
64 earlyEOF: eof,
65 t: tc.tokens,
66 })
67 err := d.Decode(&struct {
68 XMLName Name `xml:"test"`
69 }{})
70 if tc.ok && err != nil {
71 t.Fatalf("d.Decode: expected nil error, got %v", err)
72 }
73 if _, ok := err.(*SyntaxError); !tc.ok && !ok {
74 t.Errorf("d.Decode: expected syntax error, got %v", err)
75 }
76 })
77 }
78 }
79 }
80
81 type toksNil struct {
82 returnEOF bool
83 t []Token
84 }
85
86 func (t *toksNil) Token() (Token, error) {
87 if len(t.t) == 0 {
88 if !t.returnEOF {
89
90
91 t.returnEOF = true
92 return nil, nil
93 }
94 return nil, io.EOF
95 }
96 var tok Token
97 tok, t.t = t.t[0], t.t[1:]
98 return tok, nil
99 }
100
101 func TestDecodeNilToken(t *testing.T) {
102 for _, strict := range []bool{true, false} {
103 name := fmt.Sprintf("Strict=%v", strict)
104 t.Run(name, func(t *testing.T) {
105 start := StartElement{Name: Name{Local: "test"}}
106 bad := StartElement{Name: Name{Local: "bad"}}
107 d := NewTokenDecoder(&toksNil{
108
109 t: []Token{start, bad, start.End()},
110 })
111 d.Strict = strict
112 err := d.Decode(&struct {
113 XMLName Name `xml:"test"`
114 }{})
115 if _, ok := err.(*SyntaxError); !ok {
116 t.Errorf("d.Decode: expected syntax error, got %v", err)
117 }
118 })
119 }
120 }
121
122 const testInput = `
123 <?xml version="1.0" encoding="UTF-8"?>
124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
127 "\r\n\t" + ` >
128 <hello lang="en">World <>'" 白鵬翔</hello>
129 <query>&何; &is-it;</query>
130 <goodbye />
131 <outer foo:attr="value" xmlns:tag="ns4">
132 <inner/>
133 </outer>
134 <tag:name>
135 <![CDATA[Some text here.]]>
136 </tag:name>
137 </body><!-- missing final newline -->`
138
139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
140
141 var rawTokens = []Token{
142 CharData("\n"),
143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
144 CharData("\n"),
145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
147 CharData("\n"),
148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
149 CharData("\n "),
150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
151 CharData("World <>'\" 白鵬翔"),
152 EndElement{Name{"", "hello"}},
153 CharData("\n "),
154 StartElement{Name{"", "query"}, []Attr{}},
155 CharData("What is it?"),
156 EndElement{Name{"", "query"}},
157 CharData("\n "),
158 StartElement{Name{"", "goodbye"}, []Attr{}},
159 EndElement{Name{"", "goodbye"}},
160 CharData("\n "),
161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
162 CharData("\n "),
163 StartElement{Name{"", "inner"}, []Attr{}},
164 EndElement{Name{"", "inner"}},
165 CharData("\n "),
166 EndElement{Name{"", "outer"}},
167 CharData("\n "),
168 StartElement{Name{"tag", "name"}, []Attr{}},
169 CharData("\n "),
170 CharData("Some text here."),
171 CharData("\n "),
172 EndElement{Name{"tag", "name"}},
173 CharData("\n"),
174 EndElement{Name{"", "body"}},
175 Comment(" missing final newline "),
176 }
177
178 var cookedTokens = []Token{
179 CharData("\n"),
180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
181 CharData("\n"),
182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
184 CharData("\n"),
185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
186 CharData("\n "),
187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
188 CharData("World <>'\" 白鵬翔"),
189 EndElement{Name{"ns2", "hello"}},
190 CharData("\n "),
191 StartElement{Name{"ns2", "query"}, []Attr{}},
192 CharData("What is it?"),
193 EndElement{Name{"ns2", "query"}},
194 CharData("\n "),
195 StartElement{Name{"ns2", "goodbye"}, []Attr{}},
196 EndElement{Name{"ns2", "goodbye"}},
197 CharData("\n "),
198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
199 CharData("\n "),
200 StartElement{Name{"ns2", "inner"}, []Attr{}},
201 EndElement{Name{"ns2", "inner"}},
202 CharData("\n "),
203 EndElement{Name{"ns2", "outer"}},
204 CharData("\n "),
205 StartElement{Name{"ns3", "name"}, []Attr{}},
206 CharData("\n "),
207 CharData("Some text here."),
208 CharData("\n "),
209 EndElement{Name{"ns3", "name"}},
210 CharData("\n"),
211 EndElement{Name{"ns2", "body"}},
212 Comment(" missing final newline "),
213 }
214
215 const testInputAltEncoding = `
216 <?xml version="1.0" encoding="x-testing-uppercase"?>
217 <TAG>VALUE</TAG>`
218
219 var rawTokensAltEncoding = []Token{
220 CharData("\n"),
221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
222 CharData("\n"),
223 StartElement{Name{"", "tag"}, []Attr{}},
224 CharData("value"),
225 EndElement{Name{"", "tag"}},
226 }
227
228 var xmlInput = []string{
229
230 "<",
231 "<t",
232 "<t ",
233 "<t/",
234 "<!",
235 "<!-",
236 "<!--",
237 "<!--c-",
238 "<!--c--",
239 "<!d",
240 "<t></",
241 "<t></t",
242 "<?",
243 "<?p",
244 "<t a",
245 "<t a=",
246 "<t a='",
247 "<t a=''",
248 "<t/><![",
249 "<t/><![C",
250 "<t/><![CDATA[d",
251 "<t/><![CDATA[d]",
252 "<t/><![CDATA[d]]",
253
254
255 "<>",
256 "<t/a",
257 "<0 />",
258 "<?0 >",
259
260 "</0>",
261 "<t 0=''>",
262 "<t a='&'>",
263 "<t a='<'>",
264 "<t> c;</t>",
265 "<t a>",
266 "<t a=>",
267 "<t a=v>",
268
269 "<t></e>",
270 "<t></>",
271 "<t></t!",
272 "<t>cdata]]></t>",
273 }
274
275 func TestRawToken(t *testing.T) {
276 d := NewDecoder(strings.NewReader(testInput))
277 d.Entity = testEntity
278 testRawToken(t, d, testInput, rawTokens)
279 }
280
281 const nonStrictInput = `
282 <tag>non&entity</tag>
283 <tag>&unknown;entity</tag>
284 <tag>{</tag>
285 <tag>&#zzz;</tag>
286 <tag>&なまえ3;</tag>
287 <tag><-gt;</tag>
288 <tag>&;</tag>
289 <tag>&0a;</tag>
290 `
291
292 var nonStrictTokens = []Token{
293 CharData("\n"),
294 StartElement{Name{"", "tag"}, []Attr{}},
295 CharData("non&entity"),
296 EndElement{Name{"", "tag"}},
297 CharData("\n"),
298 StartElement{Name{"", "tag"}, []Attr{}},
299 CharData("&unknown;entity"),
300 EndElement{Name{"", "tag"}},
301 CharData("\n"),
302 StartElement{Name{"", "tag"}, []Attr{}},
303 CharData("{"),
304 EndElement{Name{"", "tag"}},
305 CharData("\n"),
306 StartElement{Name{"", "tag"}, []Attr{}},
307 CharData("&#zzz;"),
308 EndElement{Name{"", "tag"}},
309 CharData("\n"),
310 StartElement{Name{"", "tag"}, []Attr{}},
311 CharData("&なまえ3;"),
312 EndElement{Name{"", "tag"}},
313 CharData("\n"),
314 StartElement{Name{"", "tag"}, []Attr{}},
315 CharData("<-gt;"),
316 EndElement{Name{"", "tag"}},
317 CharData("\n"),
318 StartElement{Name{"", "tag"}, []Attr{}},
319 CharData("&;"),
320 EndElement{Name{"", "tag"}},
321 CharData("\n"),
322 StartElement{Name{"", "tag"}, []Attr{}},
323 CharData("&0a;"),
324 EndElement{Name{"", "tag"}},
325 CharData("\n"),
326 }
327
328 func TestNonStrictRawToken(t *testing.T) {
329 d := NewDecoder(strings.NewReader(nonStrictInput))
330 d.Strict = false
331 testRawToken(t, d, nonStrictInput, nonStrictTokens)
332 }
333
334 type downCaser struct {
335 t *testing.T
336 r io.ByteReader
337 }
338
339 func (d *downCaser) ReadByte() (c byte, err error) {
340 c, err = d.r.ReadByte()
341 if c >= 'A' && c <= 'Z' {
342 c += 'a' - 'A'
343 }
344 return
345 }
346
347 func (d *downCaser) Read(p []byte) (int, error) {
348 d.t.Fatalf("unexpected Read call on downCaser reader")
349 panic("unreachable")
350 }
351
352 func TestRawTokenAltEncoding(t *testing.T) {
353 d := NewDecoder(strings.NewReader(testInputAltEncoding))
354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
355 if charset != "x-testing-uppercase" {
356 t.Fatalf("unexpected charset %q", charset)
357 }
358 return &downCaser{t, input.(io.ByteReader)}, nil
359 }
360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
361 }
362
363 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
364 d := NewDecoder(strings.NewReader(testInputAltEncoding))
365 token, err := d.RawToken()
366 if token == nil {
367 t.Fatalf("expected a token on first RawToken call")
368 }
369 if err != nil {
370 t.Fatal(err)
371 }
372 token, err = d.RawToken()
373 if token != nil {
374 t.Errorf("expected a nil token; got %#v", token)
375 }
376 if err == nil {
377 t.Fatalf("expected an error on second RawToken call")
378 }
379 const encoding = "x-testing-uppercase"
380 if !strings.Contains(err.Error(), encoding) {
381 t.Errorf("expected error to contain %q; got error: %v",
382 encoding, err)
383 }
384 }
385
386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
387 lastEnd := int64(0)
388 for i, want := range rawTokens {
389 start := d.InputOffset()
390 have, err := d.RawToken()
391 end := d.InputOffset()
392 if err != nil {
393 t.Fatalf("token %d: unexpected error: %s", i, err)
394 }
395 if !reflect.DeepEqual(have, want) {
396 var shave, swant string
397 if _, ok := have.(CharData); ok {
398 shave = fmt.Sprintf("CharData(%q)", have)
399 } else {
400 shave = fmt.Sprintf("%#v", have)
401 }
402 if _, ok := want.(CharData); ok {
403 swant = fmt.Sprintf("CharData(%q)", want)
404 } else {
405 swant = fmt.Sprintf("%#v", want)
406 }
407 t.Errorf("token %d = %s, want %s", i, shave, swant)
408 }
409
410
411 switch {
412 case start < lastEnd:
413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
414 case start >= end:
415
416 if start == end && end == lastEnd {
417 break
418 }
419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
420 case end > int64(len(raw)):
421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
422 default:
423 text := raw[start:end]
424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
426 }
427 }
428 lastEnd = end
429 }
430 }
431
432
433
434
435
436 var nestedDirectivesInput = `
437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
438 <!DOCTYPE [<!ENTITY xlt ">">]>
439 <!DOCTYPE [<!ENTITY xlt "<">]>
440 <!DOCTYPE [<!ENTITY xlt '>'>]>
441 <!DOCTYPE [<!ENTITY xlt '<'>]>
442 <!DOCTYPE [<!ENTITY xlt '">'>]>
443 <!DOCTYPE [<!ENTITY xlt "'<">]>
444 `
445
446 var nestedDirectivesTokens = []Token{
447 CharData("\n"),
448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
449 CharData("\n"),
450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
451 CharData("\n"),
452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
453 CharData("\n"),
454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
455 CharData("\n"),
456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
457 CharData("\n"),
458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
459 CharData("\n"),
460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
461 CharData("\n"),
462 }
463
464 func TestNestedDirectives(t *testing.T) {
465 d := NewDecoder(strings.NewReader(nestedDirectivesInput))
466
467 for i, want := range nestedDirectivesTokens {
468 have, err := d.Token()
469 if err != nil {
470 t.Fatalf("token %d: unexpected error: %s", i, err)
471 }
472 if !reflect.DeepEqual(have, want) {
473 t.Errorf("token %d = %#v want %#v", i, have, want)
474 }
475 }
476 }
477
478 func TestToken(t *testing.T) {
479 d := NewDecoder(strings.NewReader(testInput))
480 d.Entity = testEntity
481
482 for i, want := range cookedTokens {
483 have, err := d.Token()
484 if err != nil {
485 t.Fatalf("token %d: unexpected error: %s", i, err)
486 }
487 if !reflect.DeepEqual(have, want) {
488 t.Errorf("token %d = %#v want %#v", i, have, want)
489 }
490 }
491 }
492
493 func TestSyntax(t *testing.T) {
494 for i := range xmlInput {
495 d := NewDecoder(strings.NewReader(xmlInput[i]))
496 var err error
497 for _, err = d.Token(); err == nil; _, err = d.Token() {
498 }
499 if _, ok := err.(*SyntaxError); !ok {
500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
501 }
502 }
503 }
504
505 type allScalars struct {
506 True1 bool
507 True2 bool
508 False1 bool
509 False2 bool
510 Int int
511 Int8 int8
512 Int16 int16
513 Int32 int32
514 Int64 int64
515 Uint int
516 Uint8 uint8
517 Uint16 uint16
518 Uint32 uint32
519 Uint64 uint64
520 Uintptr uintptr
521 Float32 float32
522 Float64 float64
523 String string
524 PtrString *string
525 }
526
527 var all = allScalars{
528 True1: true,
529 True2: true,
530 False1: false,
531 False2: false,
532 Int: 1,
533 Int8: -2,
534 Int16: 3,
535 Int32: -4,
536 Int64: 5,
537 Uint: 6,
538 Uint8: 7,
539 Uint16: 8,
540 Uint32: 9,
541 Uint64: 10,
542 Uintptr: 11,
543 Float32: 13.0,
544 Float64: 14.0,
545 String: "15",
546 PtrString: &sixteen,
547 }
548
549 var sixteen = "16"
550
551 const testScalarsInput = `<allscalars>
552 <True1>true</True1>
553 <True2>1</True2>
554 <False1>false</False1>
555 <False2>0</False2>
556 <Int>1</Int>
557 <Int8>-2</Int8>
558 <Int16>3</Int16>
559 <Int32>-4</Int32>
560 <Int64>5</Int64>
561 <Uint>6</Uint>
562 <Uint8>7</Uint8>
563 <Uint16>8</Uint16>
564 <Uint32>9</Uint32>
565 <Uint64>10</Uint64>
566 <Uintptr>11</Uintptr>
567 <Float>12.0</Float>
568 <Float32>13.0</Float32>
569 <Float64>14.0</Float64>
570 <String>15</String>
571 <PtrString>16</PtrString>
572 </allscalars>`
573
574 func TestAllScalars(t *testing.T) {
575 var a allScalars
576 err := Unmarshal([]byte(testScalarsInput), &a)
577
578 if err != nil {
579 t.Fatal(err)
580 }
581 if !reflect.DeepEqual(a, all) {
582 t.Errorf("have %+v want %+v", a, all)
583 }
584 }
585
586 type item struct {
587 FieldA string
588 }
589
590 func TestIssue569(t *testing.T) {
591 data := `<item><FieldA>abcd</FieldA></item>`
592 var i item
593 err := Unmarshal([]byte(data), &i)
594
595 if err != nil || i.FieldA != "abcd" {
596 t.Fatal("Expecting abcd")
597 }
598 }
599
600 func TestUnquotedAttrs(t *testing.T) {
601 data := "<tag attr=azAZ09:-_\t>"
602 d := NewDecoder(strings.NewReader(data))
603 d.Strict = false
604 token, err := d.Token()
605 if _, ok := err.(*SyntaxError); ok {
606 t.Errorf("Unexpected error: %v", err)
607 }
608 if token.(StartElement).Name.Local != "tag" {
609 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
610 }
611 attr := token.(StartElement).Attr[0]
612 if attr.Value != "azAZ09:-_" {
613 t.Errorf("Unexpected attribute value: %v", attr.Value)
614 }
615 if attr.Name.Local != "attr" {
616 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
617 }
618 }
619
620 func TestValuelessAttrs(t *testing.T) {
621 tests := [][3]string{
622 {"<p nowrap>", "p", "nowrap"},
623 {"<p nowrap >", "p", "nowrap"},
624 {"<input checked/>", "input", "checked"},
625 {"<input checked />", "input", "checked"},
626 }
627 for _, test := range tests {
628 d := NewDecoder(strings.NewReader(test[0]))
629 d.Strict = false
630 token, err := d.Token()
631 if _, ok := err.(*SyntaxError); ok {
632 t.Errorf("Unexpected error: %v", err)
633 }
634 if token.(StartElement).Name.Local != test[1] {
635 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
636 }
637 attr := token.(StartElement).Attr[0]
638 if attr.Value != test[2] {
639 t.Errorf("Unexpected attribute value: %v", attr.Value)
640 }
641 if attr.Name.Local != test[2] {
642 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
643 }
644 }
645 }
646
647 func TestCopyTokenCharData(t *testing.T) {
648 data := []byte("same data")
649 var tok1 Token = CharData(data)
650 tok2 := CopyToken(tok1)
651 if !reflect.DeepEqual(tok1, tok2) {
652 t.Error("CopyToken(CharData) != CharData")
653 }
654 data[1] = 'o'
655 if reflect.DeepEqual(tok1, tok2) {
656 t.Error("CopyToken(CharData) uses same buffer.")
657 }
658 }
659
660 func TestCopyTokenStartElement(t *testing.T) {
661 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
662 var tok1 Token = elt
663 tok2 := CopyToken(tok1)
664 if tok1.(StartElement).Attr[0].Value != "en" {
665 t.Error("CopyToken overwrote Attr[0]")
666 }
667 if !reflect.DeepEqual(tok1, tok2) {
668 t.Error("CopyToken(StartElement) != StartElement")
669 }
670 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
671 if reflect.DeepEqual(tok1, tok2) {
672 t.Error("CopyToken(CharData) uses same buffer.")
673 }
674 }
675
676 func TestSyntaxErrorLineNum(t *testing.T) {
677 testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
678 d := NewDecoder(strings.NewReader(testInput))
679 var err error
680 for _, err = d.Token(); err == nil; _, err = d.Token() {
681 }
682 synerr, ok := err.(*SyntaxError)
683 if !ok {
684 t.Error("Expected SyntaxError.")
685 }
686 if synerr.Line != 3 {
687 t.Error("SyntaxError didn't have correct line number.")
688 }
689 }
690
691 func TestTrailingRawToken(t *testing.T) {
692 input := `<FOO></FOO> `
693 d := NewDecoder(strings.NewReader(input))
694 var err error
695 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
696 }
697 if err != io.EOF {
698 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
699 }
700 }
701
702 func TestTrailingToken(t *testing.T) {
703 input := `<FOO></FOO> `
704 d := NewDecoder(strings.NewReader(input))
705 var err error
706 for _, err = d.Token(); err == nil; _, err = d.Token() {
707 }
708 if err != io.EOF {
709 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
710 }
711 }
712
713 func TestEntityInsideCDATA(t *testing.T) {
714 input := `<test><![CDATA[ &val=foo ]]></test>`
715 d := NewDecoder(strings.NewReader(input))
716 var err error
717 for _, err = d.Token(); err == nil; _, err = d.Token() {
718 }
719 if err != io.EOF {
720 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
721 }
722 }
723
724 var characterTests = []struct {
725 in string
726 err string
727 }{
728 {"\x12<doc/>", "illegal character code U+0012"},
729 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
730 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
731 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
732 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
733 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
734 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
735 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
736 {"<doc>&hello;</doc>", "invalid character entity &hello;"},
737 }
738
739 func TestDisallowedCharacters(t *testing.T) {
740
741 for i, tt := range characterTests {
742 d := NewDecoder(strings.NewReader(tt.in))
743 var err error
744
745 for err == nil {
746 _, err = d.Token()
747 }
748 synerr, ok := err.(*SyntaxError)
749 if !ok {
750 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
751 }
752 if synerr.Msg != tt.err {
753 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
754 }
755 }
756 }
757
758 func TestIsInCharacterRange(t *testing.T) {
759 invalid := []rune{
760 utf8.MaxRune + 1,
761 0xD800,
762 0xDFFF,
763 -1,
764 }
765 for _, r := range invalid {
766 if isInCharacterRange(r) {
767 t.Errorf("rune %U considered valid", r)
768 }
769 }
770 }
771
772 var procInstTests = []struct {
773 input string
774 expect [2]string
775 }{
776 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
777 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
778 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
779 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
780 {`encoding="FOO" `, [2]string{"", "FOO"}},
781 }
782
783 func TestProcInstEncoding(t *testing.T) {
784 for _, test := range procInstTests {
785 if got := procInst("version", test.input); got != test.expect[0] {
786 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
787 }
788 if got := procInst("encoding", test.input); got != test.expect[1] {
789 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
790 }
791 }
792 }
793
794
795
796
797 var directivesWithCommentsInput = `
798 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
799 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
800 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
801 `
802
803 var directivesWithCommentsTokens = []Token{
804 CharData("\n"),
805 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
806 CharData("\n"),
807 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
808 CharData("\n"),
809 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`),
810 CharData("\n"),
811 }
812
813 func TestDirectivesWithComments(t *testing.T) {
814 d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
815
816 for i, want := range directivesWithCommentsTokens {
817 have, err := d.Token()
818 if err != nil {
819 t.Fatalf("token %d: unexpected error: %s", i, err)
820 }
821 if !reflect.DeepEqual(have, want) {
822 t.Errorf("token %d = %#v want %#v", i, have, want)
823 }
824 }
825 }
826
827
828 type errWriter struct{}
829
830 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
831
832 func TestEscapeTextIOErrors(t *testing.T) {
833 expectErr := "unwritable"
834 err := EscapeText(errWriter{}, []byte{'A'})
835
836 if err == nil || err.Error() != expectErr {
837 t.Errorf("have %v, want %v", err, expectErr)
838 }
839 }
840
841 func TestEscapeTextInvalidChar(t *testing.T) {
842 input := []byte("A \x00 terminated string.")
843 expected := "A \uFFFD terminated string."
844
845 buff := new(bytes.Buffer)
846 if err := EscapeText(buff, input); err != nil {
847 t.Fatalf("have %v, want nil", err)
848 }
849 text := buff.String()
850
851 if text != expected {
852 t.Errorf("have %v, want %v", text, expected)
853 }
854 }
855
856 func TestIssue5880(t *testing.T) {
857 type T []byte
858 data, err := Marshal(T{192, 168, 0, 1})
859 if err != nil {
860 t.Errorf("Marshal error: %v", err)
861 }
862 if !utf8.Valid(data) {
863 t.Errorf("Marshal generated invalid UTF-8: %x", data)
864 }
865 }
866
867 func TestIssue11405(t *testing.T) {
868 testCases := []string{
869 "<root>",
870 "<root><foo>",
871 "<root><foo></foo>",
872 }
873 for _, tc := range testCases {
874 d := NewDecoder(strings.NewReader(tc))
875 var err error
876 for {
877 _, err = d.Token()
878 if err != nil {
879 break
880 }
881 }
882 if _, ok := err.(*SyntaxError); !ok {
883 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
884 }
885 }
886 }
887
888 func TestIssue12417(t *testing.T) {
889 testCases := []struct {
890 s string
891 ok bool
892 }{
893 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
894 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
895 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
896 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
897 }
898 for _, tc := range testCases {
899 d := NewDecoder(strings.NewReader(tc.s))
900 var err error
901 for {
902 _, err = d.Token()
903 if err != nil {
904 if err == io.EOF {
905 err = nil
906 }
907 break
908 }
909 }
910 if err != nil && tc.ok {
911 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
912 continue
913 }
914 if err == nil && !tc.ok {
915 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
916 }
917 }
918 }
919
920 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
921 return func(src TokenReader) TokenReader {
922 return mapper{
923 t: src,
924 f: mapping,
925 }
926 }
927 }
928
929 type mapper struct {
930 t TokenReader
931 f func(Token) Token
932 }
933
934 func (m mapper) Token() (Token, error) {
935 tok, err := m.t.Token()
936 if err != nil {
937 return nil, err
938 }
939 return m.f(tok), nil
940 }
941
942 func TestNewTokenDecoderIdempotent(t *testing.T) {
943 d := NewDecoder(strings.NewReader(`<br>`))
944 d2 := NewTokenDecoder(d)
945 if d != d2 {
946 t.Error("NewTokenDecoder did not detect underlying Decoder")
947 }
948 }
949
950 func TestWrapDecoder(t *testing.T) {
951 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
952 m := tokenMap(func(t Token) Token {
953 switch tok := t.(type) {
954 case StartElement:
955 if tok.Name.Local == "quote" {
956 tok.Name.Local = "blocking"
957 return tok
958 }
959 case EndElement:
960 if tok.Name.Local == "quote" {
961 tok.Name.Local = "blocking"
962 return tok
963 }
964 }
965 return t
966 })
967
968 d = NewTokenDecoder(m(d))
969
970 o := struct {
971 XMLName Name `xml:"blocking"`
972 Chardata string `xml:",chardata"`
973 }{}
974
975 if err := d.Decode(&o); err != nil {
976 t.Fatal("Got unexpected error while decoding:", err)
977 }
978
979 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
980 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
981 }
982 }
983
984 type tokReader struct{}
985
986 func (tokReader) Token() (Token, error) {
987 return StartElement{}, nil
988 }
989
990 type Failure struct{}
991
992 func (Failure) UnmarshalXML(*Decoder, StartElement) error {
993 return nil
994 }
995
996 func TestTokenUnmarshaler(t *testing.T) {
997 defer func() {
998 if r := recover(); r != nil {
999 t.Error("Unexpected panic using custom token unmarshaler")
1000 }
1001 }()
1002
1003 d := NewTokenDecoder(tokReader{})
1004 d.Decode(&Failure{})
1005 }
1006
1007 func testRoundTrip(t *testing.T, input string) {
1008 d := NewDecoder(strings.NewReader(input))
1009 var tokens []Token
1010 var buf bytes.Buffer
1011 e := NewEncoder(&buf)
1012 for {
1013 tok, err := d.Token()
1014 if err == io.EOF {
1015 break
1016 }
1017 if err != nil {
1018 t.Fatalf("invalid input: %v", err)
1019 }
1020 if err := e.EncodeToken(tok); err != nil {
1021 t.Fatalf("failed to re-encode input: %v", err)
1022 }
1023 tokens = append(tokens, CopyToken(tok))
1024 }
1025 if err := e.Flush(); err != nil {
1026 t.Fatal(err)
1027 }
1028
1029 d = NewDecoder(&buf)
1030 for {
1031 tok, err := d.Token()
1032 if err == io.EOF {
1033 break
1034 }
1035 if err != nil {
1036 t.Fatalf("failed to decode output: %v", err)
1037 }
1038 if len(tokens) == 0 {
1039 t.Fatalf("unexpected token: %#v", tok)
1040 }
1041 a, b := tokens[0], tok
1042 if !reflect.DeepEqual(a, b) {
1043 t.Fatalf("token mismatch: %#v vs %#v", a, b)
1044 }
1045 tokens = tokens[1:]
1046 }
1047 if len(tokens) > 0 {
1048 t.Fatalf("lost tokens: %#v", tokens)
1049 }
1050 }
1051
1052 func TestRoundTrip(t *testing.T) {
1053 tests := map[string]string{
1054 "leading colon": `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`,
1055 "trailing colon": `<foo abc:="x"></foo>`,
1056 "double colon": `<x:y:foo></x:y:foo>`,
1057 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
1058 }
1059 for name, input := range tests {
1060 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
1061 }
1062 }
1063
View as plain text