fork of https://github.com/sourcegraph/zoekt
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "context"
19 "hash/fnv"
20 "reflect"
21 "regexp/syntax"
22 "strconv"
23 "strings"
24 "testing"
25
26 "github.com/RoaringBitmap/roaring"
27 "github.com/google/go-cmp/cmp"
28 "github.com/google/go-cmp/cmp/cmpopts"
29 "github.com/grafana/regexp"
30
31 "github.com/sourcegraph/zoekt"
32 "github.com/sourcegraph/zoekt/query"
33)
34
35var opnames = map[syntax.Op]string{
36 syntax.OpNoMatch: "OpNoMatch",
37 syntax.OpEmptyMatch: "OpEmptyMatch",
38 syntax.OpLiteral: "OpLiteral",
39 syntax.OpCharClass: "OpCharClass",
40 syntax.OpAnyCharNotNL: "OpAnyCharNotNL",
41 syntax.OpAnyChar: "OpAnyChar",
42 syntax.OpBeginLine: "OpBeginLine",
43 syntax.OpEndLine: "OpEndLine",
44 syntax.OpBeginText: "OpBeginText",
45 syntax.OpEndText: "OpEndText",
46 syntax.OpWordBoundary: "OpWordBoundary",
47 syntax.OpNoWordBoundary: "OpNoWordBoundary",
48 syntax.OpCapture: "OpCapture",
49 syntax.OpStar: "OpStar",
50 syntax.OpPlus: "OpPlus",
51 syntax.OpQuest: "OpQuest",
52 syntax.OpRepeat: "OpRepeat",
53 syntax.OpConcat: "OpConcat",
54 syntax.OpAlternate: "OpAlternate",
55}
56
57func printRegexp(t *testing.T, r *syntax.Regexp, lvl int) {
58 t.Logf("%s%s ch: %d", strings.Repeat(" ", lvl), opnames[r.Op], len(r.Sub))
59 for _, s := range r.Sub {
60 printRegexp(t, s, lvl+1)
61 }
62}
63
64func caseSensitiveSubstrMT(pattern string) matchTree {
65 d := &indexData{}
66 mt, _ := d.newSubstringMatchTree(&query.Substring{
67 Pattern: pattern,
68 CaseSensitive: true,
69 })
70 return mt
71}
72
73func substrMT(pattern string) matchTree {
74 d := &indexData{}
75 mt, _ := d.newSubstringMatchTree(&query.Substring{
76 Pattern: pattern,
77 CaseSensitive: false,
78 })
79 return mt
80}
81
82func TestRegexpParse(t *testing.T) {
83 type testcase struct {
84 in string
85 query matchTree
86 isEquivalent bool
87 caseSensitive bool
88 }
89
90 cases := []testcase{
91 {"(foo|)bar", substrMT("bar"), false, false},
92 {"(foo|)", &bruteForceMatchTree{}, false, false},
93 {"(foo|bar)baz.*bla", &andMatchTree{[]matchTree{
94 &orMatchTree{[]matchTree{
95 substrMT("foo"),
96 substrMT("bar"),
97 }},
98 substrMT("baz"),
99 substrMT("bla"),
100 }}, false, false},
101 {
102 "^[a-z](People)+barrabas$",
103 &andMatchTree{[]matchTree{
104 substrMT("People"),
105 substrMT("barrabas"),
106 }}, false, false,
107 },
108 {"foo", substrMT("foo"), true, false},
109 {"foo", caseSensitiveSubstrMT("foo"), true, true},
110 {"(?i)foo", substrMT("FOO"), true, false},
111 {"(?i)foo", substrMT("FOO"), true, true},
112 {"^foo", substrMT("foo"), false, false},
113 {"(foo) (bar)", &andMatchTree{[]matchTree{substrMT("foo"), substrMT("bar")}}, false, false},
114 {"(thread|needle|haystack)", &orMatchTree{[]matchTree{
115 substrMT("thread"),
116 substrMT("needle"),
117 substrMT("haystack"),
118 }}, true, false},
119 {"(foo)(?-s:.)*?(bar)", &andLineMatchTree{andMatchTree{[]matchTree{
120 substrMT("foo"),
121 substrMT("bar"),
122 }}}, false, false},
123 {"(foo)(?-s:.)*?[[:space:]](?-s:.)*?(bar)", &andMatchTree{[]matchTree{
124 substrMT("foo"),
125 substrMT("bar"),
126 }}, false, false},
127 {"(foo){2,}", substrMT("foo"), false, false},
128 {"(...)(...)", &bruteForceMatchTree{}, false, false},
129 }
130
131 for _, c := range cases {
132 r, err := syntax.Parse(c.in, syntax.Perl)
133 if err != nil {
134 t.Errorf("Parse(%q): %v", c.in, err)
135 continue
136 }
137 d := indexData{}
138 q := query.Regexp{
139 Regexp: r,
140 CaseSensitive: c.caseSensitive,
141 }
142 gotQuery, isEq, _, _ := d.regexpToMatchTreeRecursive(q.Regexp, 3, q.FileName, q.CaseSensitive)
143 if !reflect.DeepEqual(c.query, gotQuery) {
144 printRegexp(t, r, 0)
145 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, gotQuery, c.query)
146 }
147 if isEq != c.isEquivalent {
148 printRegexp(t, r, 0)
149 t.Errorf("regexpToQuery(%q): got %v, want %v", c.in, isEq, c.isEquivalent)
150 }
151 }
152}
153
154func TestSearch_ShardRepoMaxMatchCountOpt(t *testing.T) {
155 cs := compoundReposShard(t, "foo", "bar")
156
157 ctx := context.Background()
158 q := &query.Const{Value: true}
159 opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1}
160
161 sr, err := cs.Search(ctx, q, opts)
162 if err != nil {
163 t.Fatal(err)
164 }
165
166 t.Run("matches", func(t *testing.T) {
167 var filenames []string
168 for _, r := range sr.Files {
169 filenames = append(filenames, r.FileName)
170 }
171
172 got, want := filenames, []string{"foo.txt", "bar.txt"}
173 if diff := cmp.Diff(want, got); diff != "" {
174 t.Errorf("mismatch (-want, +got): %s", diff)
175 }
176 })
177
178 t.Run("stats", func(t *testing.T) {
179 got, want := sr.Stats, zoekt.Stats{
180 ContentBytesLoaded: 0,
181 FileCount: 2,
182 FilesConsidered: 2,
183 FilesSkipped: 2,
184 ShardsScanned: 1,
185 MatchCount: 2,
186 }
187 if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" {
188 t.Errorf("mismatch (-want, +got): %s", diff)
189 }
190 })
191}
192
193func compoundReposShard(t *testing.T, names ...string) *indexData {
194 t.Helper()
195
196 repos := make([]*zoekt.Repository, 0, len(names))
197 docs := make([][]Document, 0, len(names))
198 for _, name := range names {
199 repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name})
200 ds := []Document{
201 {Name: name + ".txt", Content: []byte(name + " content")},
202 {Name: name + ".2.txt", Content: []byte(name + " content 2")},
203 }
204 docs = append(docs, ds)
205 }
206
207 b := testShardBuilderCompound(t, repos, docs)
208 s := searcherForTest(t, b)
209 return s.(*indexData)
210}
211
212func TestSimplifyRepoSet(t *testing.T) {
213 d := compoundReposShard(t, "foo", "bar")
214 all := &query.RepoSet{Set: map[string]bool{"foo": true, "bar": true}}
215 some := &query.RepoSet{Set: map[string]bool{"foo": true, "banana": true}}
216 none := &query.RepoSet{Set: map[string]bool{"banana": true}}
217
218 got := d.simplify(all)
219 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
220 t.Fatalf("-want, +got:\n%s", d)
221 }
222
223 got = d.simplify(some)
224 if d := cmp.Diff(some, got); d != "" {
225 t.Fatalf("-want, +got:\n%s", d)
226 }
227
228 got = d.simplify(none)
229 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
230 t.Fatalf("-want, +got:\n%s", d)
231 }
232}
233
234func TestSimplifyRepoIDs(t *testing.T) {
235 d := compoundReposShard(t, "foo", "bar")
236 all := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("bar"))}
237 some := &query.RepoIDs{Repos: roaring.BitmapOf(hash("foo"), hash("banana"))}
238 none := &query.RepoIDs{Repos: roaring.BitmapOf(hash("banana"))}
239
240 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
241
242 got := d.simplify(all)
243 if d := cmp.Diff(&query.Const{Value: true}, got, tr); d != "" {
244 t.Fatalf("-want, +got:\n%s", d)
245 }
246
247 got = d.simplify(some)
248 if d := cmp.Diff(some, got, tr); d != "" {
249 t.Fatalf("-want, +got:\n%s", d)
250 }
251
252 got = d.simplify(none)
253 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
254 t.Fatalf("-want, +got:\n%s", d)
255 }
256}
257
258func TestSimplifyRepo(t *testing.T) {
259 re := func(pat string) *query.Repo {
260 t.Helper()
261 re, err := regexp.Compile(pat)
262 if err != nil {
263 t.Fatal(err)
264 }
265 return &query.Repo{
266 Regexp: re,
267 }
268 }
269 d := compoundReposShard(t, "foo", "fool")
270 cases := []struct {
271 name string
272 q query.Q
273 want query.Q
274 }{{
275 name: "all",
276 q: re("f.*"),
277 want: &query.Const{Value: true},
278 }, {
279 name: "some",
280 q: re("foo."),
281 want: re("foo."),
282 }, {
283 name: "none",
284 q: re("banana"),
285 want: &query.Const{Value: false},
286 }}
287
288 for _, tc := range cases {
289 got := d.simplify(tc.q)
290 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
291 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
292 }
293 }
294}
295
296func TestSimplifyRepoRegexp(t *testing.T) {
297 re := func(pat string) *query.RepoRegexp {
298 t.Helper()
299 re, err := regexp.Compile(pat)
300 if err != nil {
301 t.Fatal(err)
302 }
303 return &query.RepoRegexp{
304 Regexp: re,
305 }
306 }
307 d := compoundReposShard(t, "foo", "fool")
308 cases := []struct {
309 name string
310 q query.Q
311 want query.Q
312 }{{
313 name: "all",
314 q: re("f.*"),
315 want: &query.Const{Value: true},
316 }, {
317 name: "some",
318 q: re("foo."),
319 want: re("foo."),
320 }, {
321 name: "none",
322 q: re("banana"),
323 want: &query.Const{Value: false},
324 }}
325
326 for _, tc := range cases {
327 got := d.simplify(tc.q)
328 if d := cmp.Diff(tc.want.String(), got.String()); d != "" {
329 t.Errorf("%s: -want, +got:\n%s", tc.name, d)
330 }
331 }
332}
333
334func TestSimplifyRcRawConfig(t *testing.T) {
335 d := compoundReposShard(t, "foo", "bar")
336 var all = query.RcOnlyPrivate | query.RcNoForks | query.RcNoArchived
337
338 got := d.simplify(all)
339 if d := cmp.Diff(&query.Const{Value: true}, got); d != "" {
340 t.Fatalf("-want, +got:\n%s", d)
341 }
342
343 var none = query.RcOnlyPublic | query.RcNoForks | query.RcNoArchived
344 got = d.simplify(none)
345 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
346 t.Fatalf("-want, +got:\n%s", d)
347 }
348}
349
350func TestSimplifyBranchesRepos(t *testing.T) {
351 d := compoundReposShard(t, "foo", "bar")
352
353 some := &query.BranchesRepos{
354 List: []query.BranchRepos{
355 {Branch: "branch1", Repos: roaring.BitmapOf(hash("bar"))},
356 },
357 }
358 none := &query.Repo{Regexp: regexp.MustCompile("banana")}
359
360 got := d.simplify(some)
361 tr := cmp.Transformer("", func(b *roaring.Bitmap) []uint32 { return b.ToArray() })
362 if d := cmp.Diff(some, got, tr); d != "" {
363 t.Fatalf("-want, +got:\n%s", d)
364 }
365
366 got = d.simplify(none)
367 if d := cmp.Diff(&query.Const{Value: false}, got); d != "" {
368 t.Fatalf("-want, +got:\n%s", d)
369 }
370}
371
372func hash(name string) uint32 {
373 h := fnv.New32()
374 h.Write([]byte(name))
375 return h.Sum32()
376}
377
378func TestGatherBranches(t *testing.T) {
379 content := []byte("dummy")
380 b := testShardBuilder(t, &zoekt.Repository{
381 Branches: []zoekt.RepositoryBranch{
382 {"foo", "v1"},
383 {"foo-2", "v1"},
384 {"main", "v1"},
385 {"bar", "v1"},
386 {"quz", "v1"},
387 },
388 },
389 Document{Name: "f1", Content: content, Branches: []string{"foo", "bar", "quz"}},
390 Document{Name: "f2", Content: content, Branches: []string{"foo", "foo-2"}},
391 Document{Name: "f3", Content: content, Branches: []string{"main"}})
392
393 d := searcherForTest(t, b).(*indexData)
394
395 sr, err := d.Search(
396 context.Background(),
397 &query.Or{Children: []query.Q{
398 &query.Branch{Pattern: "foo"},
399 &query.Branch{Pattern: "quz"},
400 }},
401 &zoekt.SearchOptions{},
402 )
403 if err != nil {
404 t.Fatal(err)
405 }
406
407 want := map[string][]string{
408 "f1": {"foo", "quz"},
409 "f2": {"foo", "foo-2"},
410 }
411
412 if len(sr.Files) != 2 {
413 t.Fatalf("len(sr.Files): want %d, got %d", 2, len(sr.Files))
414 }
415
416 for _, f := range sr.Files {
417 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" {
418 t.Fatalf("-want,+got:\n%s", d)
419 }
420 }
421}
422
423func TestGatherBranchesMany(t *testing.T) {
424 content := []byte("dummy")
425 manyBranchNames := []string{}
426 manyBranches := []zoekt.RepositoryBranch{}
427 for i := range 64 {
428 branchName := "branch-" + strconv.Itoa(i)
429 manyBranchNames = append(manyBranchNames, branchName)
430 manyBranches = append(manyBranches, zoekt.RepositoryBranch{
431 Name: branchName,
432 Version: "v1"})
433 }
434 b := testShardBuilder(t, &zoekt.Repository{
435 Branches: manyBranches,
436 }, Document{Name: "f1", Content: content, Branches: manyBranchNames})
437
438 d := searcherForTest(t, b).(*indexData)
439
440 sr, err := d.Search(
441 context.Background(),
442 &query.Substring{
443 Pattern: "dummy",
444 CaseSensitive: false,
445 },
446 &zoekt.SearchOptions{},
447 )
448 if err != nil {
449 t.Fatal(err)
450 }
451
452 want := map[string][]string{
453 "f1": manyBranchNames,
454 }
455
456 if len(sr.Files) != 1 {
457 t.Fatalf("len(sr.Files): want %d, got %d", 1, len(sr.Files))
458 }
459
460 for _, f := range sr.Files {
461 if d := cmp.Diff(want[f.FileName], f.Branches); d != "" {
462 t.Fatalf("-want,+got:\n%s", d)
463 }
464 }
465}
466
467func TestSimplifyMeta(t *testing.T) {
468 re := regexp.MustCompile("^stable$")
469 d := compoundReposShard(t, "foo", "bar")
470
471 // Inject metadata into the fake repos
472 d.repoMetaData[0].Metadata = map[string]string{"release": "stable"}
473 d.repoMetaData[1].Metadata = map[string]string{"release": "beta"}
474
475 all := &query.Meta{Field: "release", Value: regexp.MustCompile(".*")}
476 some := &query.Meta{Field: "release", Value: re}
477 none := &query.Meta{Field: "release", Value: regexp.MustCompile("^nonexistent$")}
478
479 if got := d.simplify(all); !reflect.DeepEqual(got, &query.Const{Value: true}) {
480 t.Errorf("simplify(all): got %v, want Const(true)", got)
481 }
482
483 if got := d.simplify(some); got != some {
484 t.Errorf("simplify(some): got %v, want unchanged", got)
485 }
486
487 if got := d.simplify(none); !reflect.DeepEqual(got, &query.Const{Value: false}) {
488 t.Errorf("simplify(none): got %v, want Const(false)", got)
489 }
490}