fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package query
16
17import (
18 "regexp/syntax"
19 "strings"
20 "testing"
21)
22
23var opnames = map[syntax.Op]string{
24 syntax.OpNoMatch: "OpNoMatch",
25 syntax.OpEmptyMatch: "OpEmptyMatch",
26 syntax.OpLiteral: "OpLiteral",
27 syntax.OpCharClass: "OpCharClass",
28 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
29 syntax.OpAnyChar: "OpAnyChar",
30 syntax.OpBeginLine: "OpBeginLine",
31 syntax.OpEndLine: "OpEndLine",
32 syntax.OpBeginText: "OpBeginText",
33 syntax.OpEndText: "OpEndText",
34 syntax.OpWordBoundary: "OpWordBoundary",
35 syntax.OpNoWordBoundary: "OpNoWordBoundary",
36 syntax.OpCapture: "OpCapture",
37 syntax.OpStar: "OpStar",
38 syntax.OpPlus: "OpPlus",
39 syntax.OpQuest: "OpQuest",
40 syntax.OpRepeat: "OpRepeat",
41 syntax.OpConcat: "OpConcat",
42 syntax.OpAlternate: "OpAlternate",
43}
44
45func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
46 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
47 for _, s := range r.Sub {
48 printRegexp(t, s, lvl+1)
49 }
50}
51
52func TestLowerRegexp(t *testing.T) {
53 in := "[a-zA-Z]fooBAR"
54 re := mustParseRE(in)
55 in = re.String()
56 got := LowerRegexp(re)
57 want := "[a-za-z]foobar"
58 if got.String() != want {
59 printRegexp(t, re, 0)
60 printRegexp(t, got, 0)
61 t.Errorf("got %s, want %s", got, want)
62 }
63
64 if re.String() != in {
65 t.Errorf("got mutated original %s want %s", re.String(), in)
66 }
67}
68
69func TestOptimize(t *testing.T) {
70 tests := []struct {
71 name string
72 in string
73 want string
74 }{
75 {name: "simple capture", in: "(hello)world", want: "(?:hello)world"},
76 {name: "simple capture == literal", in: "(hello)world", want: "helloworld"},
77 {name: "capture alternative", in: "test(ing|ed)", want: "test(?:ing|ed)"},
78 {name: "capture repeat", in: "ba(na){1,2}", want: "ba(?:na){1,2}"},
79 {name: "nested captures", in: "b(a(n(a(n(a)))))", want: "banana"},
80 }
81
82 for _, tt := range tests {
83 t.Run(tt.name, func(t *testing.T) {
84 // optimizeRegexp always calls Simplify
85 // calling Simplify here makes test cases more predictable
86 simplifiedWant := mustParseRE(tt.want).Simplify()
87
88 in := mustParseRE(tt.in)
89 got := OptimizeRegexp(in, regexpFlags)
90
91 // String comparison as the same Regexp string can have different ASTs
92 // e.g. optimize of `ba(na){1,2}` == `bana(?:na)?`
93 // however the AST is different from directly parsing `bana(?:na)?`
94 if got.String() != simplifiedWant.String() {
95 printRegexp(t, got, 0)
96 printRegexp(t, simplifiedWant, 0)
97 t.Errorf("got %s, want %s\n", got, tt.want)
98 }
99 })
100 }
101}