fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package query
16
17import (
18 "log"
19 "regexp/syntax"
20
21 "github.com/sourcegraph/zoekt/internal/syntaxutil"
22)
23
24var _ = log.Println
25
26func LowerRegexp(r *syntax.Regexp) *syntax.Regexp {
27 newRE := *r
28 switch r.Op {
29 case syntax.OpLiteral, syntax.OpCharClass:
30 newRE.Rune = make([]rune, len(r.Rune))
31 for i, c := range r.Rune {
32 if c >= 'A' && c <= 'Z' {
33 newRE.Rune[i] = c + 'a' - 'A'
34 } else {
35 newRE.Rune[i] = c
36 }
37 }
38 default:
39 newRE.Sub = make([]*syntax.Regexp, len(newRE.Sub))
40 for i, s := range r.Sub {
41 newRE.Sub[i] = LowerRegexp(s)
42 }
43 }
44
45 return &newRE
46}
47
48// OptimizeRegexp converts capturing groups to non-capturing groups.
49// Returns original input if an error is encountered
50func OptimizeRegexp(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp {
51 r := convertCapture(re, flags)
52 return r.Simplify()
53}
54
55func convertCapture(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp {
56 if !hasCapture(re) {
57 return re
58 }
59
60 // Make a copy so in unlikely event of an error the original can be used as a fallback
61 r, err := syntax.Parse(syntaxutil.RegexpString(re), flags)
62 if err != nil {
63 log.Printf("failed to copy regexp `%s`: %v", re, err)
64 return re
65 }
66
67 r = uncapture(r)
68
69 // Parse again for new structure to take effect
70 r, err = syntax.Parse(syntaxutil.RegexpString(r), flags)
71 if err != nil {
72 log.Printf("failed to parse regexp after uncapture `%s`: %v", r, err)
73 return re
74 }
75
76 return r
77}
78
79func hasCapture(r *syntax.Regexp) bool {
80 if r.Op == syntax.OpCapture {
81 return true
82 }
83
84 for _, s := range r.Sub {
85 if hasCapture(s) {
86 return true
87 }
88 }
89
90 return false
91}
92
93func uncapture(r *syntax.Regexp) *syntax.Regexp {
94 if r.Op == syntax.OpCapture {
95 // Captures only have one subexpression
96 r.Op = syntax.OpConcat
97 r.Cap = 0
98 r.Name = ""
99 }
100
101 for i, s := range r.Sub {
102 r.Sub[i] = uncapture(s)
103 }
104
105 return r
106}