fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package query
16
17import (
18 "regexp/syntax"
19 "strings"
20 "testing"
21
22 "github.com/sourcegraph/zoekt/internal/syntaxutil"
23)
24
25var opnames = map[syntax.Op]string{
26 syntax.OpNoMatch: "OpNoMatch",
27 syntax.OpEmptyMatch: "OpEmptyMatch",
28 syntax.OpLiteral: "OpLiteral",
29 syntax.OpCharClass: "OpCharClass",
30 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
31 syntax.OpAnyChar: "OpAnyChar",
32 syntax.OpBeginLine: "OpBeginLine",
33 syntax.OpEndLine: "OpEndLine",
34 syntax.OpBeginText: "OpBeginText",
35 syntax.OpEndText: "OpEndText",
36 syntax.OpWordBoundary: "OpWordBoundary",
37 syntax.OpNoWordBoundary: "OpNoWordBoundary",
38 syntax.OpCapture: "OpCapture",
39 syntax.OpStar: "OpStar",
40 syntax.OpPlus: "OpPlus",
41 syntax.OpQuest: "OpQuest",
42 syntax.OpRepeat: "OpRepeat",
43 syntax.OpConcat: "OpConcat",
44 syntax.OpAlternate: "OpAlternate",
45}
46
47func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
48 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
49 for _, s := range r.Sub {
50 printRegexp(t, s, lvl+1)
51 }
52}
53
54func TestLowerRegexp(t *testing.T) {
55 in := "[a-zA-Z]fooBAR"
56 re := mustParseRE(in)
57 in = syntaxutil.RegexpString(re)
58 got := LowerRegexp(re)
59 want := "[a-za-z]foobar"
60 if got.String() != want {
61 printRegexp(t, re, 0)
62 printRegexp(t, got, 0)
63 t.Errorf("got %s, want %s", got, want)
64 }
65
66 if orig := syntaxutil.RegexpString(re); orig != in {
67 t.Errorf("got mutated original %s want %s", orig, in)
68 }
69}
70
71func TestOptimize(t *testing.T) {
72 tests := []struct {
73 name string
74 in string
75 want string
76 }{
77 {name: "simple capture", in: "(hello)world", want: "(?:hello)world"},
78 {name: "simple capture == literal", in: "(hello)world", want: "helloworld"},
79 {name: "capture alternative", in: "test(ing|ed)", want: "test(?:ing|ed)"},
80 {name: "capture repeat", in: "ba(na){1,2}", want: "ba(?:na){1,2}"},
81 {name: "nested captures", in: "b(a(n(a(n(a)))))", want: "banana"},
82 }
83
84 for _, tt := range tests {
85 t.Run(tt.name, func(t *testing.T) {
86 // optimizeRegexp always calls Simplify
87 // calling Simplify here makes test cases more predictable
88 simplifiedWant := mustParseRE(tt.want).Simplify()
89
90 in := mustParseRE(tt.in)
91 got := OptimizeRegexp(in, regexpFlags)
92
93 // String comparison as the same Regexp string can have different ASTs
94 // e.g. optimize of `ba(na){1,2}` == `bana(?:na)?`
95 // however the AST is different from directly parsing `bana(?:na)?`
96 if got.String() != simplifiedWant.String() {
97 printRegexp(t, got, 0)
98 printRegexp(t, simplifiedWant, 0)
99 t.Errorf("got %s, want %s\n", got, tt.want)
100 }
101 })
102 }
103}
104
105func TestRegexpRegexpString(t *testing.T) {
106 tests := []struct {
107 in string
108 want string
109 }{
110 {in: `abc`, want: `abc`},
111 {in: `a.*b`, want: `a(?-s:.)*b`},
112 }
113
114 for _, tt := range tests {
115 t.Run(tt.in, func(t *testing.T) {
116 q := &Regexp{Regexp: mustParseRE(tt.in)}
117 if got := q.RegexpString(); got != tt.want {
118 t.Fatalf("RegexpString(%q) = %q, want %q", tt.in, got, tt.want)
119 }
120 if got := q.String(); got == tt.want {
121 t.Fatalf("String(%q) = raw pattern %q, want query formatting", tt.in, got)
122 }
123 })
124 }
125}