1
2
3
4
5 package syntax
6
7 import (
8 "strconv"
9 "strings"
10 "unicode"
11 )
12
13
14
15
16
17 type Prog struct {
18 Inst []Inst
19 Start int
20 NumCap int
21 }
22
23
24 type InstOp uint8
25
26 const (
27 InstAlt InstOp = iota
28 InstAltMatch
29 InstCapture
30 InstEmptyWidth
31 InstMatch
32 InstFail
33 InstNop
34 InstRune
35 InstRune1
36 InstRuneAny
37 InstRuneAnyNotNL
38 )
39
40 var instOpNames = []string{
41 "InstAlt",
42 "InstAltMatch",
43 "InstCapture",
44 "InstEmptyWidth",
45 "InstMatch",
46 "InstFail",
47 "InstNop",
48 "InstRune",
49 "InstRune1",
50 "InstRuneAny",
51 "InstRuneAnyNotNL",
52 }
53
54 func (i InstOp) String() string {
55 if uint(i) >= uint(len(instOpNames)) {
56 return ""
57 }
58 return instOpNames[i]
59 }
60
61
62 type EmptyOp uint8
63
64 const (
65 EmptyBeginLine EmptyOp = 1 << iota
66 EmptyEndLine
67 EmptyBeginText
68 EmptyEndText
69 EmptyWordBoundary
70 EmptyNoWordBoundary
71 )
72
73
74
75
76
77
78
79 func EmptyOpContext(r1, r2 rune) EmptyOp {
80 var op EmptyOp = EmptyNoWordBoundary
81 var boundary byte
82 switch {
83 case IsWordChar(r1):
84 boundary = 1
85 case r1 == '\n':
86 op |= EmptyBeginLine
87 case r1 < 0:
88 op |= EmptyBeginText | EmptyBeginLine
89 }
90 switch {
91 case IsWordChar(r2):
92 boundary ^= 1
93 case r2 == '\n':
94 op |= EmptyEndLine
95 case r2 < 0:
96 op |= EmptyEndText | EmptyEndLine
97 }
98 if boundary != 0 {
99 op ^= (EmptyWordBoundary | EmptyNoWordBoundary)
100 }
101 return op
102 }
103
104
105
106
107 func IsWordChar(r rune) bool {
108 return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
109 }
110
111
112 type Inst struct {
113 Op InstOp
114 Out uint32
115 Arg uint32
116 Rune []rune
117 }
118
119 func (p *Prog) String() string {
120 var b strings.Builder
121 dumpProg(&b, p)
122 return b.String()
123 }
124
125
126 func (p *Prog) skipNop(pc uint32) *Inst {
127 i := &p.Inst[pc]
128 for i.Op == InstNop || i.Op == InstCapture {
129 i = &p.Inst[i.Out]
130 }
131 return i
132 }
133
134
135 func (i *Inst) op() InstOp {
136 op := i.Op
137 switch op {
138 case InstRune1, InstRuneAny, InstRuneAnyNotNL:
139 op = InstRune
140 }
141 return op
142 }
143
144
145
146
147 func (p *Prog) Prefix() (prefix string, complete bool) {
148 i := p.skipNop(uint32(p.Start))
149
150
151 if i.op() != InstRune || len(i.Rune) != 1 {
152 return "", i.Op == InstMatch
153 }
154
155
156 var buf strings.Builder
157 for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
158 buf.WriteRune(i.Rune[0])
159 i = p.skipNop(i.Out)
160 }
161 return buf.String(), i.Op == InstMatch
162 }
163
164
165
166 func (p *Prog) StartCond() EmptyOp {
167 var flag EmptyOp
168 pc := uint32(p.Start)
169 i := &p.Inst[pc]
170 Loop:
171 for {
172 switch i.Op {
173 case InstEmptyWidth:
174 flag |= EmptyOp(i.Arg)
175 case InstFail:
176 return ^EmptyOp(0)
177 case InstCapture, InstNop:
178
179 default:
180 break Loop
181 }
182 pc = i.Out
183 i = &p.Inst[pc]
184 }
185 return flag
186 }
187
188 const noMatch = -1
189
190
191
192 func (i *Inst) MatchRune(r rune) bool {
193 return i.MatchRunePos(r) != noMatch
194 }
195
196
197
198
199
200
201 func (i *Inst) MatchRunePos(r rune) int {
202 rune := i.Rune
203
204 switch len(rune) {
205 case 0:
206 return noMatch
207
208 case 1:
209
210 r0 := rune[0]
211 if r == r0 {
212 return 0
213 }
214 if Flags(i.Arg)&FoldCase != 0 {
215 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
216 if r == r1 {
217 return 0
218 }
219 }
220 }
221 return noMatch
222
223 case 2:
224 if r >= rune[0] && r <= rune[1] {
225 return 0
226 }
227 return noMatch
228
229 case 4, 6, 8:
230
231
232 for j := 0; j < len(rune); j += 2 {
233 if r < rune[j] {
234 return noMatch
235 }
236 if r <= rune[j+1] {
237 return j / 2
238 }
239 }
240 return noMatch
241 }
242
243
244 lo := 0
245 hi := len(rune) / 2
246 for lo < hi {
247 m := lo + (hi-lo)/2
248 if c := rune[2*m]; c <= r {
249 if r <= rune[2*m+1] {
250 return m
251 }
252 lo = m + 1
253 } else {
254 hi = m
255 }
256 }
257 return noMatch
258 }
259
260
261
262
263 func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
264 switch EmptyOp(i.Arg) {
265 case EmptyBeginLine:
266 return before == '\n' || before == -1
267 case EmptyEndLine:
268 return after == '\n' || after == -1
269 case EmptyBeginText:
270 return before == -1
271 case EmptyEndText:
272 return after == -1
273 case EmptyWordBoundary:
274 return IsWordChar(before) != IsWordChar(after)
275 case EmptyNoWordBoundary:
276 return IsWordChar(before) == IsWordChar(after)
277 }
278 panic("unknown empty width arg")
279 }
280
281 func (i *Inst) String() string {
282 var b strings.Builder
283 dumpInst(&b, i)
284 return b.String()
285 }
286
287 func bw(b *strings.Builder, args ...string) {
288 for _, s := range args {
289 b.WriteString(s)
290 }
291 }
292
293 func dumpProg(b *strings.Builder, p *Prog) {
294 for j := range p.Inst {
295 i := &p.Inst[j]
296 pc := strconv.Itoa(j)
297 if len(pc) < 3 {
298 b.WriteString(" "[len(pc):])
299 }
300 if j == p.Start {
301 pc += "*"
302 }
303 bw(b, pc, "\t")
304 dumpInst(b, i)
305 bw(b, "\n")
306 }
307 }
308
309 func u32(i uint32) string {
310 return strconv.FormatUint(uint64(i), 10)
311 }
312
313 func dumpInst(b *strings.Builder, i *Inst) {
314 switch i.Op {
315 case InstAlt:
316 bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
317 case InstAltMatch:
318 bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
319 case InstCapture:
320 bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
321 case InstEmptyWidth:
322 bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
323 case InstMatch:
324 bw(b, "match")
325 case InstFail:
326 bw(b, "fail")
327 case InstNop:
328 bw(b, "nop -> ", u32(i.Out))
329 case InstRune:
330 if i.Rune == nil {
331
332 bw(b, "rune <nil>")
333 }
334 bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)))
335 if Flags(i.Arg)&FoldCase != 0 {
336 bw(b, "/i")
337 }
338 bw(b, " -> ", u32(i.Out))
339 case InstRune1:
340 bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
341 case InstRuneAny:
342 bw(b, "any -> ", u32(i.Out))
343 case InstRuneAnyNotNL:
344 bw(b, "anynotnl -> ", u32(i.Out))
345 }
346 }
347
View as plain text