fork of https://github.com/sourcegraph/zoekt
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "context"
19 "hash/fnv"
20 "reflect"
21 "regexp/syntax"
22 "strings"
23 "testing"
24
25 "github.com/RoaringBitmap/roaring"
26 "github.com/google/go-cmp/cmp"
27 "github.com/grafana/regexp"
28 "github.com/sourcegraph/zoekt/query"
29)
30
31var opnames = map[syntax.Op]string{
32 syntax.OpNoMatch: "OpNoMatch",
33 syntax.OpEmptyMatch: "OpEmptyMatch",
34 syntax.OpLiteral: "OpLiteral",
35 syntax.OpCharClass: "OpCharClass",
36 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
37 syntax.OpAnyChar: "OpAnyChar",
38 syntax.OpBeginLine: "OpBeginLine",
39 syntax.OpEndLine: "OpEndLine",
40 syntax.OpBeginText: "OpBeginText",
41 syntax.OpEndText: "OpEndText",
42 syntax.OpWordBoundary: "OpWordBoundary",
43 syntax.OpNoWordBoundary: "OpNoWordBoundary",
44 syntax.OpCapture: "OpCapture",
45 syntax.OpStar: "OpStar",
46 syntax.OpPlus: "OpPlus",
47 syntax.OpQuest: "OpQuest",
48 syntax.OpRepeat: "OpRepeat",
49 syntax.OpConcat: "OpConcat",
50 syntax.OpAlternate: "OpAlternate",
51}
52
53func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
54 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
55 for _, s := range r.Sub {
56 printRegexp(t, s, lvl+1)
57 }
58}
59
60func substrMT(pattern string) matchTree {
61 d := &indexData{}
62 mt, _ := d.newSubstringMatchTree(&query.Substring{
63 Pattern: pattern,
64 })
65 return mt
66}
67
68func TestRegexpParse(t *testing.T) {
69 type testcase struct {
70 in string
71 query matchTree
72 isEquivalent bool
73 }
74
75 cases := []testcase{
76 {"(foo|)bar", substrMT("bar"), false},
77 {"(foo|)", &bruteForceMatchTree{}, false},
78 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
79 &orMatchTree{[]matchTree{
80 substrMT("foo"),
81 substrMT("bar"),
82 }},
83 substrMT("baz"),
84 substrMT("bla"),
85 }}, false},
86 {
87 "^[a-z](People)+barrabas$",
88 &andMatchTree{[]matchTree{
89 substrMT("People"),
90 substrMT("barrabas"),
91 }}, false,
92 },
93 {"foo", substrMT("foo"), true},
94 {"^foo", substrMT("foo"), false},
95 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false},
96 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{
97 substrMT("thread"),
98 substrMT("needle"),
99 substrMT("haystack"),
100 }}, true},
101 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
102 substrMT("foo"),
103 substrMT("bar"),
104 }}}, false},
105 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
106 substrMT("foo"),
107 substrMT("bar"),
108 }}, false},
109 {"(foo){2,}", substrMT("foo"), false},
110 {"(...)(...)", &bruteForceMatchTree{}, false},
111 }
112
113 for _, c := range cases {
114 r, err := syntax.Parse(c.in, syntax.Perl)
115 if err != nil {
116 t.Errorf("Parse(%q): %v", c.in, err)
117 continue
118 }
119 d := indexData{}
120 q := query.Regexp{
121 Regexp: r,
122 }
123 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
124 if !reflect.DeepEqual(c.query, gotQuery) {
125 printRegexp(t, r, 0)
126 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
127 }
128 if isEq != c.isEquivalent {
129 printRegexp(t, r, 0)
130 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
131 }
132 }
133}
134
135func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) {
136 cs := compoundReposShard(t, "foo", "bar")
137
138 ctx := context.Background()
139 q := &query.Const{Value: true}
140 opts := &SearchOptions{ShardRepoMaxMatchCount: 1}
141
142 sr, err := cs.Search(ctx, q, opts)
143 if err != nil {
144 t.Fatal(err)
145 }
146
147 t.Run("matches", func(t *testing.T) {
148 var filenames []string
149 for _, r := range sr.Files {
150 filenames = append(filenames, r.FileName)
151 }
152
153 got, want := filenames, []string{"foo.txt", "bar.txt"}
154 if diff := cmp.Diff(want, got); diff != "" {
155 t.Errorf("mismatch (-want, +got): %s", diff)
156 }
157 })
158
159 t.Run("stats", func(t *testing.T) {
160 got, want := sr.Stats, Stats{
161 ContentBytesLoaded: 2,
162 FileCount: 2,
163 FilesConsidered: 2,
164 FilesSkipped: 2,
165 ShardsScanned: 1,
166 MatchCount: 2,
167 }
168 if diff := cmp.Diff(want, got); diff != "" {
169 t.Errorf("mismatch (-want, +got): %s", diff)
170 }
171 })
172}
173
174func compoundReposShard(t *testing.T, names ...string) *indexData {
175 t.Helper()
176
177 repos := make([]*Repository, 0, len(names))
178 docs := make([][]Document, 0, len(names))
179 for _, name := range names {
180 repos = append(repos, &Repository{ID: hash(name), Name: name})
181 ds := []Document{
182 {Name: name + ".txt", Content: []byte(name + " content")},
183 {Name: name + ".2.txt", Content: []byte(name + " content 2")},
184 }
185 docs = append(docs, ds)
186 }
187
188 b := testIndexBuilderCompound(t, repos, docs)
189 s := searcherForTest(t, b)
190 return s.(*indexData)
191}
192
193func TestSimplifyRepoSet(t *testing.T) {
194 d := compoundReposShard(t, "foo", "bar")
195 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}}
196 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}}
197 none := &query.RepoSet{Set: map[string]bool{"banana": true}}
198
199 got := d.simplify(all)
200 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
201 t.Fatalf("-want, +got:\n%s", d)
202 }
203
204 got = d.simplify(some)
205 if d := cmp.Diff(some, got); d != "" {
206 t.Fatalf("-want, +got:\n%s", d)
207 }
208
209 got = d.simplify(none)
210 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
211 t.Fatalf("-want, +got:\n%s", d)
212 }
213}
214
215func TestSimplifyRepoIDs(t *testing.T) {
216 d := compoundReposShard(t, "foo", "bar")
217 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))}
218 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))}
219 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))}
220
221 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
222
223 got := d.simplify(all)
224 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" {
225 t.Fatalf("-want, +got:\n%s", d)
226 }
227
228 got = d.simplify(some)
229 if d := cmp.Diff(some, got, tr); d != "" {
230 t.Fatalf("-want, +got:\n%s", d)
231 }
232
233 got = d.simplify(none)
234 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
235 t.Fatalf("-want, +got:\n%s", d)
236 }
237}
238
239func TestSimplifyRepo(t *testing.T) {
240 re := func(pat string) *query.Repo {
241 t.Helper()
242 re, err := regexp.Compile(pat)
243 if err != nil {
244 t.Fatal(err)
245 }
246 return &query.Repo{
247 Regexp: re,
248 }
249 }
250 d := compoundReposShard(t, "foo", "fool")
251 cases := []struct {
252 name string
253 q query.Q
254 want query.Q
255 }{{
256 name: "all",
257 q: re("f.*"),
258 want: &query.Const{Value: true},
259 }, {
260 name: "some",
261 q: re("foo."),
262 want: re("foo."),
263 }, {
264 name: "none",
265 q: re("banana"),
266 want: &query.Const{Value: false},
267 }}
268
269 for _, tc := range cases {
270 got := d.simplify(tc.q)
271 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
272 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
273 }
274 }
275}
276
277func TestSimplifyRepoRegexp(t *testing.T) {
278 re := func(pat string) *query.RepoRegexp {
279 t.Helper()
280 re, err := regexp.Compile(pat)
281 if err != nil {
282 t.Fatal(err)
283 }
284 return &query.RepoRegexp{
285 Regexp: re,
286 }
287 }
288 d := compoundReposShard(t, "foo", "fool")
289 cases := []struct {
290 name string
291 q query.Q
292 want query.Q
293 }{{
294 name: "all",
295 q: re("f.*"),
296 want: &query.Const{Value: true},
297 }, {
298 name: "some",
299 q: re("foo."),
300 want: re("foo."),
301 }, {
302 name: "none",
303 q: re("banana"),
304 want: &query.Const{Value: false},
305 }}
306
307 for _, tc := range cases {
308 got := d.simplify(tc.q)
309 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
310 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
311 }
312 }
313}
314
315func TestSimplifyBranchesRepos(t *testing.T) {
316 d := compoundReposShard(t, "foo", "bar")
317
318 some := &query.BranchesRepos{
319 List: []query.BranchRepos{
320 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))},
321 },
322 }
323 none := &query.Repo{Regexp: regexp.MustCompile("banana")}
324
325 got := d.simplify(some)
326 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
327 if d := cmp.Diff(some, got, tr); d != "" {
328 t.Fatalf("-want, +got:\n%s", d)
329 }
330
331 got = d.simplify(none)
332 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
333 t.Fatalf("-want, +got:\n%s", d)
334 }
335}
336
337func hash(name string) uint32 {
338 h := fnv.New32()
339 h.Write([]byte(name))
340 return h.Sum32()
341}