fork of https://github.com/sourcegraph/zoekt
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "context"
19 "hash/fnv"
20 "reflect"
21 "regexp/syntax"
22 "strconv"
23 "strings"
24 "testing"
25
26 "github.com/RoaringBitmap/roaring"
27 "github.com/google/go-cmp/cmp"
28 "github.com/google/go-cmp/cmp/cmpopts"
29 "github.com/grafana/regexp"
30 "github.com/sourcegraph/zoekt"
31 "github.com/sourcegraph/zoekt/query"
32)
33
34var opnames = map[syntax.Op]string{
35 syntax.OpNoMatch: "OpNoMatch",
36 syntax.OpEmptyMatch: "OpEmptyMatch",
37 syntax.OpLiteral: "OpLiteral",
38 syntax.OpCharClass: "OpCharClass",
39 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
40 syntax.OpAnyChar: "OpAnyChar",
41 syntax.OpBeginLine: "OpBeginLine",
42 syntax.OpEndLine: "OpEndLine",
43 syntax.OpBeginText: "OpBeginText",
44 syntax.OpEndText: "OpEndText",
45 syntax.OpWordBoundary: "OpWordBoundary",
46 syntax.OpNoWordBoundary: "OpNoWordBoundary",
47 syntax.OpCapture: "OpCapture",
48 syntax.OpStar: "OpStar",
49 syntax.OpPlus: "OpPlus",
50 syntax.OpQuest: "OpQuest",
51 syntax.OpRepeat: "OpRepeat",
52 syntax.OpConcat: "OpConcat",
53 syntax.OpAlternate: "OpAlternate",
54}
55
56func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
57 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
58 for _, s := range r.Sub {
59 printRegexp(t, s, lvl+1)
60 }
61}
62
63func caseSensitiveSubstrMT(pattern string) matchTree {
64 d := &indexData{}
65 mt, _ := d.newSubstringMatchTree(&query.Substring{
66 Pattern: pattern,
67 CaseSensitive: true,
68 })
69 return mt
70}
71
72func substrMT(pattern string) matchTree {
73 d := &indexData{}
74 mt, _ := d.newSubstringMatchTree(&query.Substring{
75 Pattern: pattern,
76 CaseSensitive: false,
77 })
78 return mt
79}
80
81func TestRegexpParse(t *testing.T) {
82 type testcase struct {
83 in string
84 query matchTree
85 isEquivalent bool
86 caseSensitive bool
87 }
88
89 cases := []testcase{
90 {"(foo|)bar", substrMT("bar"), false, false},
91 {"(foo|)", &bruteForceMatchTree{}, false, false},
92 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
93 &orMatchTree{[]matchTree{
94 substrMT("foo"),
95 substrMT("bar"),
96 }},
97 substrMT("baz"),
98 substrMT("bla"),
99 }}, false, false},
100 {
101 "^[a-z](People)+barrabas$",
102 &andMatchTree{[]matchTree{
103 substrMT("People"),
104 substrMT("barrabas"),
105 }}, false, false,
106 },
107 {"foo", substrMT("foo"), true, false},
108 {"foo", caseSensitiveSubstrMT("foo"), true, true},
109 {"(?i)foo", substrMT("FOO"), true, false},
110 {"(?i)foo", substrMT("FOO"), true, true},
111 {"^foo", substrMT("foo"), false, false},
112 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false, false},
113 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{
114 substrMT("thread"),
115 substrMT("needle"),
116 substrMT("haystack"),
117 }}, true, false},
118 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
119 substrMT("foo"),
120 substrMT("bar"),
121 }}}, false, false},
122 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
123 substrMT("foo"),
124 substrMT("bar"),
125 }}, false, false},
126 {"(foo){2,}", substrMT("foo"), false, false},
127 {"(...)(...)", &bruteForceMatchTree{}, false, false},
128 }
129
130 for _, c := range cases {
131 r, err := syntax.Parse(c.in, syntax.Perl)
132 if err != nil {
133 t.Errorf("Parse(%q): %v", c.in, err)
134 continue
135 }
136 d := indexData{}
137 q := query.Regexp{
138 Regexp: r,
139 CaseSensitive: c.caseSensitive,
140 }
141 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
142 if !reflect.DeepEqual(c.query, gotQuery) {
143 printRegexp(t, r, 0)
144 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
145 }
146 if isEq != c.isEquivalent {
147 printRegexp(t, r, 0)
148 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
149 }
150 }
151}
152
153func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) {
154 cs := compoundReposShard(t, "foo", "bar")
155
156 ctx := context.Background()
157 q := &query.Const{Value: true}
158 opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1}
159
160 sr, err := cs.Search(ctx, q, opts)
161 if err != nil {
162 t.Fatal(err)
163 }
164
165 t.Run("matches", func(t *testing.T) {
166 var filenames []string
167 for _, r := range sr.Files {
168 filenames = append(filenames, r.FileName)
169 }
170
171 got, want := filenames, []string{"foo.txt", "bar.txt"}
172 if diff := cmp.Diff(want, got); diff != "" {
173 t.Errorf("mismatch (-want, +got): %s", diff)
174 }
175 })
176
177 t.Run("stats", func(t *testing.T) {
178 got, want := sr.Stats, zoekt.Stats{
179 ContentBytesLoaded: 0,
180 FileCount: 2,
181 FilesConsidered: 2,
182 FilesSkipped: 2,
183 ShardsScanned: 1,
184 MatchCount: 2,
185 }
186 if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
187 t.Errorf("mismatch (-want, +got): %s", diff)
188 }
189 })
190}
191
192func compoundReposShard(t *testing.T, names ...string) *indexData {
193 t.Helper()
194
195 repos := make([]*zoekt.Repository, 0, len(names))
196 docs := make([][]Document, 0, len(names))
197 for _, name := range names {
198 repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name})
199 ds := []Document{
200 {Name: name + ".txt", Content: []byte(name + " content")},
201 {Name: name + ".2.txt", Content: []byte(name + " content 2")},
202 }
203 docs = append(docs, ds)
204 }
205
206 b := testShardBuilderCompound(t, repos, docs)
207 s := searcherForTest(t, b)
208 return s.(*indexData)
209}
210
211func TestSimplifyRepoSet(t *testing.T) {
212 d := compoundReposShard(t, "foo", "bar")
213 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}}
214 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}}
215 none := &query.RepoSet{Set: map[string]bool{"banana": true}}
216
217 got := d.simplify(all)
218 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
219 t.Fatalf("-want, +got:\n%s", d)
220 }
221
222 got = d.simplify(some)
223 if d := cmp.Diff(some, got); d != "" {
224 t.Fatalf("-want, +got:\n%s", d)
225 }
226
227 got = d.simplify(none)
228 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
229 t.Fatalf("-want, +got:\n%s", d)
230 }
231}
232
233func TestSimplifyRepoIDs(t *testing.T) {
234 d := compoundReposShard(t, "foo", "bar")
235 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))}
236 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))}
237 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))}
238
239 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
240
241 got := d.simplify(all)
242 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" {
243 t.Fatalf("-want, +got:\n%s", d)
244 }
245
246 got = d.simplify(some)
247 if d := cmp.Diff(some, got, tr); d != "" {
248 t.Fatalf("-want, +got:\n%s", d)
249 }
250
251 got = d.simplify(none)
252 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
253 t.Fatalf("-want, +got:\n%s", d)
254 }
255}
256
257func TestSimplifyRepo(t *testing.T) {
258 re := func(pat string) *query.Repo {
259 t.Helper()
260 re, err := regexp.Compile(pat)
261 if err != nil {
262 t.Fatal(err)
263 }
264 return &query.Repo{
265 Regexp: re,
266 }
267 }
268 d := compoundReposShard(t, "foo", "fool")
269 cases := []struct {
270 name string
271 q query.Q
272 want query.Q
273 }{{
274 name: "all",
275 q: re("f.*"),
276 want: &query.Const{Value: true},
277 }, {
278 name: "some",
279 q: re("foo."),
280 want: re("foo."),
281 }, {
282 name: "none",
283 q: re("banana"),
284 want: &query.Const{Value: false},
285 }}
286
287 for _, tc := range cases {
288 got := d.simplify(tc.q)
289 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
290 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
291 }
292 }
293}
294
295func TestSimplifyRepoRegexp(t *testing.T) {
296 re := func(pat string) *query.RepoRegexp {
297 t.Helper()
298 re, err := regexp.Compile(pat)
299 if err != nil {
300 t.Fatal(err)
301 }
302 return &query.RepoRegexp{
303 Regexp: re,
304 }
305 }
306 d := compoundReposShard(t, "foo", "fool")
307 cases := []struct {
308 name string
309 q query.Q
310 want query.Q
311 }{{
312 name: "all",
313 q: re("f.*"),
314 want: &query.Const{Value: true},
315 }, {
316 name: "some",
317 q: re("foo."),
318 want: re("foo."),
319 }, {
320 name: "none",
321 q: re("banana"),
322 want: &query.Const{Value: false},
323 }}
324
325 for _, tc := range cases {
326 got := d.simplify(tc.q)
327 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
328 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
329 }
330 }
331}
332
333func TestSimplifyRcRawConfig(t *testing.T) {
334 d := compoundReposShard(t, "foo", "bar")
335 var all = query.RcOnlyPrivate | query.RcNoForks | query.RcNoArchived
336
337 got := d.simplify(all)
338 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
339 t.Fatalf("-want, +got:\n%s", d)
340 }
341
342 var none = query.RcOnlyPublic | query.RcNoForks | query.RcNoArchived
343 got = d.simplify(none)
344 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
345 t.Fatalf("-want, +got:\n%s", d)
346 }
347}
348
349func TestSimplifyBranchesRepos(t *testing.T) {
350 d := compoundReposShard(t, "foo", "bar")
351
352 some := &query.BranchesRepos{
353 List: []query.BranchRepos{
354 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))},
355 },
356 }
357 none := &query.Repo{Regexp: regexp.MustCompile("banana")}
358
359 got := d.simplify(some)
360 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
361 if d := cmp.Diff(some, got, tr); d != "" {
362 t.Fatalf("-want, +got:\n%s", d)
363 }
364
365 got = d.simplify(none)
366 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
367 t.Fatalf("-want, +got:\n%s", d)
368 }
369}
370
371func hash(name string) uint32 {
372 h := fnv.New32()
373 h.Write([]byte(name))
374 return h.Sum32()
375}
376
377func TestGatherBranches(t *testing.T) {
378 content := []byte("dummy")
379 b := testShardBuilder(t, &zoekt.Repository{
380 Branches: []zoekt.RepositoryBranch{
381 {"foo", "v1"},
382 {"foo-2", "v1"},
383 {"main", "v1"},
384 {"bar", "v1"},
385 {"quz", "v1"},
386 },
387 },
388 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}},
389 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}},
390 Document{Name: "f3", Content: content, Branches: []string{"main"}})
391
392 d := searcherForTest(t, b).(*indexData)
393
394 sr, err := d.Search(
395 context.Background(),
396 &query.Or{Children: []query.Q{
397 &query.Branch{Pattern: "foo"},
398 &query.Branch{Pattern: "quz"},
399 }},
400 &zoekt.SearchOptions{},
401 )
402 if err != nil {
403 t.Fatal(err)
404 }
405
406 want := map[string][]string{
407 "f1": {"foo", "quz"},
408 "f2": {"foo", "foo-2"},
409 }
410
411 if len(sr.Files) != 2 {
412 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files))
413 }
414
415 for _, f := range sr.Files {
416 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" {
417 t.Fatalf("-want,+got:\n%s", d)
418 }
419 }
420}
421
422func TestGatherBranchesMany(t *testing.T) {
423 content := []byte("dummy")
424 manyBranchNames := []string{}
425 manyBranches := []zoekt.RepositoryBranch{}
426 for i := range 64 {
427 branchName := "branch-" + strconv.Itoa(i)
428 manyBranchNames = append(manyBranchNames, branchName)
429 manyBranches = append(manyBranches, zoekt.RepositoryBranch{
430 Name: branchName,
431 Version: "v1"})
432 }
433 b := testShardBuilder(t, &zoekt.Repository{
434 Branches: manyBranches,
435 }, Document{Name: "f1", Content: content, Branches: manyBranchNames})
436
437 d := searcherForTest(t, b).(*indexData)
438
439 sr, err := d.Search(
440 context.Background(),
441 &query.Substring{
442 Pattern: "dummy",
443 CaseSensitive: false,
444 },
445 &zoekt.SearchOptions{},
446 )
447 if err != nil {
448 t.Fatal(err)
449 }
450
451 want := map[string][]string{
452 "f1": manyBranchNames,
453 }
454
455 if len(sr.Files) != 1 {
456 t.Fatalf("len(sr.Files): want %d, got %d", 1, len(sr.Files))
457 }
458
459 for _, f := range sr.Files {
460 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" {
461 t.Fatalf("-want,+got:\n%s", d)
462 }
463 }
464}