fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package query
16
17import (
18 "log"
19 "regexp/syntax"
20
21 "slices"
22
23 "github.com/sourcegraph/zoekt/internal/syntaxutil"
24)
25
26var _ = log.Println
27
28func LowerRegexp(r *syntax.Regexp) *syntax.Regexp {
29 newRE := *r
30 switch r.Op {
31 case syntax.OpLiteral, syntax.OpCharClass:
32 newRE.Rune = make([]rune, len(r.Rune))
33 for i, c := range r.Rune {
34 if c >= 'A' && c <= 'Z' {
35 newRE.Rune[i] = c + 'a' - 'A'
36 } else {
37 newRE.Rune[i] = c
38 }
39 }
40 default:
41 newRE.Sub = make([]*syntax.Regexp, len(newRE.Sub))
42 for i, s := range r.Sub {
43 newRE.Sub[i] = LowerRegexp(s)
44 }
45 }
46
47 return &newRE
48}
49
50// OptimizeRegexp converts capturing groups to non-capturing groups.
51// Returns original input if an error is encountered
52func OptimizeRegexp(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp {
53 r := convertCapture(re, flags)
54 return r.Simplify()
55}
56
57func convertCapture(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp {
58 if !hasCapture(re) {
59 return re
60 }
61
62 // Make a copy so in unlikely event of an error the original can be used as a fallback
63 r, err := syntax.Parse(syntaxutil.RegexpString(re), flags)
64 if err != nil {
65 log.Printf("failed to copy regexp `%s`: %v", re, err)
66 return re
67 }
68
69 r = uncapture(r)
70
71 // Parse again for new structure to take effect
72 r, err = syntax.Parse(syntaxutil.RegexpString(r), flags)
73 if err != nil {
74 log.Printf("failed to parse regexp after uncapture `%s`: %v", r, err)
75 return re
76 }
77
78 return r
79}
80
81func hasCapture(r *syntax.Regexp) bool {
82 if r.Op == syntax.OpCapture {
83 return true
84 }
85
86 return slices.ContainsFunc(r.Sub, hasCapture)
87}
88
89func uncapture(r *syntax.Regexp) *syntax.Regexp {
90 if r.Op == syntax.OpCapture {
91 // Captures only have one subexpression
92 r.Op = syntax.OpConcat
93 r.Cap = 0
94 r.Name = ""
95 }
96
97 for i, s := range r.Sub {
98 r.Sub[i] = uncapture(s)
99 }
100
101 return r
102}