Source file
src/html/escape.go
Documentation: html
1
2
3
4
5
6 package html
7
8 import (
9 "strings"
10 "unicode/utf8"
11 )
12
13
14
15
16 var replacementTable = [...]rune{
17 '\u20AC',
18 '\u0081',
19 '\u201A',
20 '\u0192',
21 '\u201E',
22 '\u2026',
23 '\u2020',
24 '\u2021',
25 '\u02C6',
26 '\u2030',
27 '\u0160',
28 '\u2039',
29 '\u0152',
30 '\u008D',
31 '\u017D',
32 '\u008F',
33 '\u0090',
34 '\u2018',
35 '\u2019',
36 '\u201C',
37 '\u201D',
38 '\u2022',
39 '\u2013',
40 '\u2014',
41 '\u02DC',
42 '\u2122',
43 '\u0161',
44 '\u203A',
45 '\u0153',
46 '\u009D',
47 '\u017E',
48 '\u0178',
49
50
51 }
52
53
54
55
56 func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) {
57 const attribute = false
58
59
60
61
62 i, s := 1, b[src:]
63
64 if len(s) <= 1 {
65 b[dst] = b[src]
66 return dst + 1, src + 1
67 }
68
69 if s[i] == '#' {
70 if len(s) <= 3 {
71 b[dst] = b[src]
72 return dst + 1, src + 1
73 }
74 i++
75 c := s[i]
76 hex := false
77 if c == 'x' || c == 'X' {
78 hex = true
79 i++
80 }
81
82 x := '\x00'
83 for i < len(s) {
84 c = s[i]
85 i++
86 if hex {
87 if '0' <= c && c <= '9' {
88 x = 16*x + rune(c) - '0'
89 continue
90 } else if 'a' <= c && c <= 'f' {
91 x = 16*x + rune(c) - 'a' + 10
92 continue
93 } else if 'A' <= c && c <= 'F' {
94 x = 16*x + rune(c) - 'A' + 10
95 continue
96 }
97 } else if '0' <= c && c <= '9' {
98 x = 10*x + rune(c) - '0'
99 continue
100 }
101 if c != ';' {
102 i--
103 }
104 break
105 }
106
107 if i <= 3 {
108 b[dst] = b[src]
109 return dst + 1, src + 1
110 }
111
112 if 0x80 <= x && x <= 0x9F {
113
114 x = replacementTable[x-0x80]
115 } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
116
117 x = '\uFFFD'
118 }
119
120 return dst + utf8.EncodeRune(b[dst:], x), src + i
121 }
122
123
124
125
126 for i < len(s) {
127 c := s[i]
128 i++
129
130 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
131 continue
132 }
133 if c != ';' {
134 i--
135 }
136 break
137 }
138
139 entityName := s[1:i]
140 if len(entityName) == 0 {
141
142 } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
143
144 } else if x := entity[string(entityName)]; x != 0 {
145 return dst + utf8.EncodeRune(b[dst:], x), src + i
146 } else if x := entity2[string(entityName)]; x[0] != 0 {
147 dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
148 return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
149 } else if !attribute {
150 maxLen := len(entityName) - 1
151 if maxLen > longestEntityWithoutSemicolon {
152 maxLen = longestEntityWithoutSemicolon
153 }
154 for j := maxLen; j > 1; j-- {
155 if x := entity[string(entityName[:j])]; x != 0 {
156 return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
157 }
158 }
159 }
160
161 dst1, src1 = dst+i, src+i
162 copy(b[dst:dst1], b[src:src1])
163 return dst1, src1
164 }
165
166 var htmlEscaper = strings.NewReplacer(
167 `&`, "&",
168 `'`, "'",
169 `<`, "<",
170 `>`, ">",
171 `"`, """,
172 )
173
174
175
176
177
178 func EscapeString(s string) string {
179 return htmlEscaper.Replace(s)
180 }
181
182
183
184
185
186
187 func UnescapeString(s string) string {
188 populateMapsOnce.Do(populateMaps)
189 i := strings.IndexByte(s, '&')
190
191 if i < 0 {
192 return s
193 }
194
195 b := []byte(s)
196 dst, src := unescapeEntity(b, i, i)
197 for len(s[src:]) > 0 {
198 if s[src] == '&' {
199 i = 0
200 } else {
201 i = strings.IndexByte(s[src:], '&')
202 }
203 if i < 0 {
204 dst += copy(b[dst:], s[src:])
205 break
206 }
207
208 if i > 0 {
209 copy(b[dst:], s[src:src+i])
210 }
211 dst, src = unescapeEntity(b, dst+i, src+i)
212 }
213 return string(b[:dst])
214 }
215
View as plain text