fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package query
16
17import (
18 "log"
19 "regexp/syntax"
20)
21
22var _ = log.Println
23
24func LowerRegexp(r *syntax.Regexp) *syntax.Regexp {
25 newRE := *r
26 switch r.Op {
27 case syntax.OpLiteral, syntax.OpCharClass:
28 newRE.Rune = make([]rune, len(r.Rune))
29 for i, c := range r.Rune {
30 if c >= 'A' && c <= 'Z' {
31 newRE.Rune[i] = c + 'a' - 'A'
32 } else {
33 newRE.Rune[i] = c
34 }
35 }
36 default:
37 newRE.Sub = make([]*syntax.Regexp, len(newRE.Sub))
38 for i, s := range r.Sub {
39 newRE.Sub[i] = LowerRegexp(s)
40 }
41 }
42
43 return &newRE
44}
45
46// OptimizeRegexp converts capturing groups to non-capturing groups.
47// Returns original input if an error is encountered
48func OptimizeRegexp(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp {
49 r := convertCapture(re, flags)
50 return r.Simplify()
51}
52
53func convertCapture(re *syntax.Regexp, flags syntax.Flags) *syntax.Regexp {
54 if !hasCapture(re) {
55 return re
56 }
57
58 // Make a copy so in unlikely event of an error the original can be used as a fallback
59 r, err := syntax.Parse(re.String(), flags)
60 if err != nil {
61 log.Printf("failed to copy regexp `%s`: %v", re, err)
62 return re
63 }
64
65 r = uncapture(r)
66
67 // Parse again for new structure to take effect
68 r, err = syntax.Parse(r.String(), flags)
69 if err != nil {
70 log.Printf("failed to parse regexp after uncapture `%s`: %v", r, err)
71 return re
72 }
73
74 return r
75}
76
77func hasCapture(r *syntax.Regexp) bool {
78 if r.Op == syntax.OpCapture {
79 return true
80 }
81
82 for _, s := range r.Sub {
83 if hasCapture(s) {
84 return true
85 }
86 }
87
88 return false
89}
90
91func uncapture(r *syntax.Regexp) *syntax.Regexp {
92 if r.Op == syntax.OpCapture {
93 // Captures only have one subexpression
94 r.Op = syntax.OpConcat
95 r.Cap = 0
96 r.Name = ""
97 }
98
99 for i, s := range r.Sub {
100 r.Sub[i] = uncapture(s)
101 }
102
103 return r
104}