fork of https://github.com/sourcegraph/zoekt
1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntaxutil
6
7import (
8 "regexp/syntax"
9 "strconv"
10 "strings"
11 "unicode"
12)
13
14// Note to implementers:
15// In this package, re is always a *Regexp and r is always a rune.
16
17// writeRegexp writes the Perl syntax for the regular expression re to b.
18func writeRegexp(b *strings.Builder, re *syntax.Regexp) {
19 switch re.Op {
20 default:
21 b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
22 case syntax.OpNoMatch:
23 b.WriteString(`[^\x00-\x{10FFFF}]`)
24 case syntax.OpEmptyMatch:
25 b.WriteString(`(?:)`)
26 case syntax.OpLiteral:
27 if re.Flags&syntax.FoldCase != 0 {
28 b.WriteString(`(?i:`)
29 }
30 for _, r := range re.Rune {
31 escape(b, r, false)
32 }
33 if re.Flags&syntax.FoldCase != 0 {
34 b.WriteString(`)`)
35 }
36 case syntax.OpCharClass:
37 if len(re.Rune)%2 != 0 {
38 b.WriteString(`[invalid char class]`)
39 break
40 }
41 b.WriteRune('[')
42 if len(re.Rune) == 0 {
43 b.WriteString(`^\x00-\x{10FFFF}`)
44 } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune && len(re.Rune) > 2 {
45 // Contains 0 and MaxRune. Probably a negated class.
46 // Print the gaps.
47 b.WriteRune('^')
48 for i := 1; i < len(re.Rune)-1; i += 2 {
49 lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
50 escape(b, lo, lo == '-')
51 if lo != hi {
52 b.WriteRune('-')
53 escape(b, hi, hi == '-')
54 }
55 }
56 } else {
57 for i := 0; i < len(re.Rune); i += 2 {
58 lo, hi := re.Rune[i], re.Rune[i+1]
59 escape(b, lo, lo == '-')
60 if lo != hi {
61 b.WriteRune('-')
62 escape(b, hi, hi == '-')
63 }
64 }
65 }
66 b.WriteRune(']')
67 case syntax.OpAnyCharNotNL:
68 b.WriteString(`(?-s:.)`)
69 case syntax.OpAnyChar:
70 b.WriteString(`(?s:.)`)
71 case syntax.OpBeginLine:
72 b.WriteString(`(?m:^)`)
73 case syntax.OpEndLine:
74 b.WriteString(`(?m:$)`)
75 case syntax.OpBeginText:
76 b.WriteString(`\A`)
77 case syntax.OpEndText:
78 if re.Flags&syntax.WasDollar != 0 {
79 b.WriteString(`(?-m:$)`)
80 } else {
81 b.WriteString(`\z`)
82 }
83 case syntax.OpWordBoundary:
84 b.WriteString(`\b`)
85 case syntax.OpNoWordBoundary:
86 b.WriteString(`\B`)
87 case syntax.OpCapture:
88 if re.Name != "" {
89 b.WriteString(`(?P<`)
90 b.WriteString(re.Name)
91 b.WriteRune('>')
92 } else {
93 b.WriteRune('(')
94 }
95 if re.Sub[0].Op != syntax.OpEmptyMatch {
96 writeRegexp(b, re.Sub[0])
97 }
98 b.WriteRune(')')
99 case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
100 if sub := re.Sub[0]; sub.Op > syntax.OpCapture || sub.Op == syntax.OpLiteral && len(sub.Rune) > 1 {
101 b.WriteString(`(?:`)
102 writeRegexp(b, sub)
103 b.WriteString(`)`)
104 } else {
105 writeRegexp(b, sub)
106 }
107 switch re.Op {
108 case syntax.OpStar:
109 b.WriteRune('*')
110 case syntax.OpPlus:
111 b.WriteRune('+')
112 case syntax.OpQuest:
113 b.WriteRune('?')
114 case syntax.OpRepeat:
115 b.WriteRune('{')
116 b.WriteString(strconv.Itoa(re.Min))
117 if re.Max != re.Min {
118 b.WriteRune(',')
119 if re.Max >= 0 {
120 b.WriteString(strconv.Itoa(re.Max))
121 }
122 }
123 b.WriteRune('}')
124 }
125 if re.Flags&syntax.NonGreedy != 0 {
126 b.WriteRune('?')
127 }
128 case syntax.OpConcat:
129 for _, sub := range re.Sub {
130 if sub.Op == syntax.OpAlternate {
131 b.WriteString(`(?:`)
132 writeRegexp(b, sub)
133 b.WriteString(`)`)
134 } else {
135 writeRegexp(b, sub)
136 }
137 }
138 case syntax.OpAlternate:
139 for i, sub := range re.Sub {
140 if i > 0 {
141 b.WriteRune('|')
142 }
143 writeRegexp(b, sub)
144 }
145 }
146}
147
148func RegexpString(re *syntax.Regexp) string {
149 var b strings.Builder
150 writeRegexp(&b, re)
151 return b.String()
152}
153
154const meta = `\.+*?()|[]{}^$`
155
156func escape(b *strings.Builder, r rune, force bool) {
157 if unicode.IsPrint(r) {
158 if strings.ContainsRune(meta, r) || force {
159 b.WriteRune('\\')
160 }
161 b.WriteRune(r)
162 return
163 }
164
165 switch r {
166 case '\a':
167 b.WriteString(`\a`)
168 case '\f':
169 b.WriteString(`\f`)
170 case '\n':
171 b.WriteString(`\n`)
172 case '\r':
173 b.WriteString(`\r`)
174 case '\t':
175 b.WriteString(`\t`)
176 case '\v':
177 b.WriteString(`\v`)
178 default:
179 if r < 0x100 {
180 b.WriteString(`\x`)
181 s := strconv.FormatInt(int64(r), 16)
182 if len(s) == 1 {
183 b.WriteRune('0')
184 }
185 b.WriteString(s)
186 break
187 }
188 b.WriteString(`\x{`)
189 b.WriteString(strconv.FormatInt(int64(r), 16))
190 b.WriteString(`}`)
191 }
192}