fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package shards
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "hash/fnv"
22 "log"
23 "math"
24 "os"
25 "reflect"
26 "runtime"
27 "sort"
28 "strconv"
29 "testing"
30 "testing/quick"
31 "time"
32
33 "github.com/RoaringBitmap/roaring"
34 "github.com/google/go-cmp/cmp"
35 "github.com/google/go-cmp/cmp/cmpopts"
36 "github.com/grafana/regexp"
37
38 "github.com/sourcegraph/zoekt"
39 "github.com/sourcegraph/zoekt/query"
40)
41
42type crashSearcher struct{}
43
44func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
45 panic("search")
46}
47
48func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
49 panic("list")
50}
51
52func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) {
53 return &zoekt.RepoStats{}, nil
54}
55
56func (s *crashSearcher) Close() {}
57
58func (s *crashSearcher) String() string { return "crashSearcher" }
59
60func TestCrashResilience(t *testing.T) {
61 out := &bytes.Buffer{}
62 log.SetOutput(out)
63 defer log.SetOutput(os.Stderr)
64 ss := newShardedSearcher(2)
65 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}})
66
67 var wantCrashes int
68 test := func(t *testing.T) {
69 q := &query.Substring{Pattern: "hoi"}
70 opts := &zoekt.SearchOptions{}
71 if res, err := ss.Search(context.Background(), q, opts); err != nil {
72 t.Fatalf("Search: %v", err)
73 } else if res.Stats.Crashes != wantCrashes {
74 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes)
75 }
76
77 if res, err := ss.List(context.Background(), q, nil); err != nil {
78 t.Fatalf("List: %v", err)
79 } else if res.Crashes != wantCrashes {
80 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes)
81 }
82 }
83
84 // Before we are marked as ready we have one extra crash
85 wantCrashes = 2
86 t.Run("loading", test)
87
88 // After marking as ready we should only have the crashSearcher
89 // contributing.
90 ss.markReady()
91 wantCrashes = 1
92 t.Run("ready", test)
93}
94
95type rankSearcher struct {
96 rank uint16
97 repo *zoekt.Repository
98}
99
100func (s *rankSearcher) Close() {
101}
102
103func (s *rankSearcher) String() string {
104 return ""
105}
106
107func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
108 select {
109 case <-ctx.Done():
110 return &zoekt.SearchResult{}, nil
111 default:
112 }
113
114 // Ugly, but without sleep it's too fast, and we can't
115 // simulate the cutoff.
116 time.Sleep(time.Millisecond)
117 return &zoekt.SearchResult{
118 Files: []zoekt.FileMatch{
119 {
120 FileName: fmt.Sprintf("f%d", s.rank),
121 Score: float64(s.rank),
122 },
123 },
124 Stats: zoekt.Stats{
125 MatchCount: 1,
126 },
127 }, nil
128}
129
130func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
131 r := zoekt.Repository{}
132 if s.repo != nil {
133 r = *s.repo
134 }
135 r.Rank = s.rank
136 return &zoekt.RepoList{
137 Repos: []*zoekt.RepoListEntry{
138 {Repository: r},
139 },
140 }, nil
141}
142
143func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo }
144
145func TestOrderByShard(t *testing.T) {
146 ss := newShardedSearcher(1)
147
148 n := 10 * runtime.GOMAXPROCS(0)
149 for i := 0; i < n; i++ {
150 ss.replace(map[string]zoekt.Searcher{
151 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)},
152 })
153 }
154
155 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil {
156 t.Errorf("Search: %v", err)
157 } else if len(res.Files) != n {
158 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n)
159 }
160
161 opts := zoekt.SearchOptions{
162 TotalMaxMatchCount: 3,
163 }
164 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts)
165 if err != nil {
166 t.Errorf("Search: %v", err)
167 }
168
169 if len(res.Files) < opts.TotalMaxMatchCount {
170 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount)
171 }
172 if len(res.Files) == n {
173 t.Errorf("got %d results, want < %d", len(res.Files), n)
174 }
175 for i, f := range res.Files {
176 rev := n - 1 - i
177 want := fmt.Sprintf("f%d", rev)
178 got := f.FileName
179
180 if got != want {
181 t.Logf("%d: got %q, want %q", i, got, want)
182 }
183 }
184}
185
186func TestShardedSearcher_Ranking(t *testing.T) {
187 ss := newShardedSearcher(1)
188
189 var nextShardNum int
190 addShard := func(repo string, priority float64, docs ...zoekt.Document) {
191 r := &zoekt.Repository{ID: hash(repo), Name: repo}
192 r.RawConfig = map[string]string{
193 "public": "1",
194 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
195 }
196 b := testIndexBuilder(t, r, docs...)
197 shard := searcherForTest(t, b)
198 ss.replace(map[string]zoekt.Searcher{
199 fmt.Sprintf("key-%d", nextShardNum): shard,
200 })
201 nextShardNum++
202 }
203
204 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
205 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
206 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
207 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
208
209 want := []string{
210 "super-star",
211 "moderately-popular",
212 "weekend-project",
213 "weekend-project-2",
214 }
215
216 var have []string
217 for _, s := range ss.getLoaded().shards {
218 for _, r := range s.repos {
219 have = append(have, r.Name)
220 }
221 }
222
223 if !reflect.DeepEqual(want, have) {
224 t.Fatalf("\nwant: %s\nhave: %s", want, have)
225 }
226}
227
228func TestShardedSearcher_DocumentRanking(t *testing.T) {
229 ss := newShardedSearcher(1)
230
231 var nextShardNum int
232 addShard := func(repo string, rank uint16, docs ...zoekt.Document) {
233 r := &zoekt.Repository{ID: hash(repo), Name: repo}
234 r.RawConfig = map[string]string{
235 "public": "1",
236 }
237 r.Rank = rank
238 b := testIndexBuilder(t, r, docs...)
239 shard := searcherForTest(t, b)
240 ss.replace(map[string]zoekt.Searcher{
241 fmt.Sprintf("key-%d", nextShardNum): shard,
242 })
243 nextShardNum++
244 }
245
246 addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")})
247 addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")})
248 addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
249 addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")},
250 zoekt.Document{Name: "f5", Content: []byte("fooooo")})
251
252 // Run a stream search and gather the results
253 var results []*zoekt.SearchResult
254 opts := &zoekt.SearchOptions{
255 UseDocumentRanks: true,
256 FlushWallTime: 100 * time.Millisecond,
257 }
258
259 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts,
260 zoekt.SenderFunc(func(event *zoekt.SearchResult) {
261 results = append(results, event)
262 }))
263 if err != nil {
264 t.Fatal(err)
265 }
266
267 // There should always be two stream results, first progress-only, then the file results
268 if len(results) != 2 {
269 t.Fatalf("expected 2 streamed results, but got %d", len(results))
270 }
271
272 // The ranking should be determined by whether it's an exact word match,
273 // followed by repository priority
274 want := []string{"f4", "f3", "f5", "f2", "f1"}
275
276 files := results[1].Files
277 got := make([]string, len(files))
278 for i := 0; i < len(files); i++ {
279 got[i] = files[i].FileName
280 }
281
282 if !reflect.DeepEqual(got, want) {
283 t.Errorf("got %v, want %v", got, want)
284 }
285}
286
287func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
288 ss := newShardedSearcher(1)
289
290 // namePrefix is so we can create a repo:foo filter and match the same set
291 // of repos.
292 namePrefix := [3]string{"foo", "bar", "baz"}
293
294 repoSetNames := []string{}
295 repoIDs := []uint32{}
296 n := 10 * runtime.GOMAXPROCS(0)
297 for i := 0; i < n; i++ {
298 shardName := fmt.Sprintf("shard%d", i)
299 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i)
300 repoID := hash(repoName)
301
302 if i%3 == 0 {
303 repoSetNames = append(repoSetNames, repoName)
304 repoIDs = append(repoIDs, repoID)
305 }
306
307 ss.replace(map[string]zoekt.Searcher{
308 shardName: &rankSearcher{
309 repo: &zoekt.Repository{ID: repoID, Name: repoName},
310 rank: uint16(n - i),
311 },
312 })
313 }
314
315 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{})
316 if err != nil {
317 t.Errorf("Search: %v", err)
318 }
319 if len(res.Files) != n {
320 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n)
321 }
322
323 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{
324 {Branch: "HEAD", Repos: roaring.New()},
325 }}
326
327 for _, name := range repoSetNames {
328 branchesRepos.List[0].Repos.Add(hash(name))
329 }
330
331 set := query.NewRepoSet(repoSetNames...)
332 sub := &query.Substring{Pattern: "bla"}
333
334 repoIDsQuery := query.NewRepoIDs(repoIDs...)
335 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")}
336
337 queries := []query.Q{
338 query.NewAnd(set, sub),
339 // Test with the same reposet again
340 query.NewAnd(set, sub),
341
342 query.NewAnd(branchesRepos, sub),
343 // Test with the same repoBranches with IDs again
344 query.NewAnd(branchesRepos, sub),
345
346 query.NewAnd(repoIDsQuery, sub),
347 // Test with the same repoIDs again
348 query.NewAnd(repoIDsQuery, sub),
349
350 query.NewAnd(repoQuery, sub),
351 query.NewAnd(repoQuery, sub),
352
353 // List has queries which are just the reposet atoms. We also test twice.
354 set,
355 set,
356 branchesRepos,
357 branchesRepos,
358 repoIDsQuery,
359 repoIDsQuery,
360 repoQuery,
361 repoQuery,
362 }
363
364 for _, q := range queries {
365 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{})
366 if err != nil {
367 t.Errorf("Search(%s): %v", q, err)
368 }
369 // Note: Assertion is based on fact that `rankSearcher` always returns a
370 // result and using repoSet will half the number of results
371 if len(res.Files) != len(repoSetNames) {
372 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames))
373 }
374 }
375}
376
377func hash(name string) uint32 {
378 h := fnv.New32()
379 h.Write([]byte(name))
380 return h.Sum32()
381}
382
383type memSeeker struct {
384 data []byte
385}
386
387func (s *memSeeker) Name() string {
388 return "memseeker"
389}
390
391func (s *memSeeker) Close() {}
392func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
393 return s.data[off : off+sz], nil
394}
395
396func (s *memSeeker) Size() (uint32, error) {
397 return uint32(len(s.data)), nil
398}
399
400func TestUnloadIndex(t *testing.T) {
401 b := testIndexBuilder(t, nil, zoekt.Document{
402 Name: "filename",
403 Content: []byte("needle needle needle"),
404 })
405
406 var buf bytes.Buffer
407 if err := b.Write(&buf); err != nil {
408 t.Fatal(err)
409 }
410 indexBytes := buf.Bytes()
411 indexFile := &memSeeker{indexBytes}
412 searcher, err := zoekt.NewSearcher(indexFile)
413 if err != nil {
414 t.Fatalf("NewSearcher: %v", err)
415 }
416
417 ss := newShardedSearcher(2)
418 ss.replace(map[string]zoekt.Searcher{"key": searcher})
419
420 var opts zoekt.SearchOptions
421 q := &query.Substring{Pattern: "needle"}
422 res, err := ss.Search(context.Background(), q, &opts)
423 if err != nil {
424 t.Fatalf("Search(%s): %v", q, err)
425 }
426
427 forbidden := byte(29)
428 for i := range indexBytes {
429 // non-ASCII
430 indexBytes[i] = forbidden
431 }
432
433 for _, f := range res.Files {
434 if bytes.Contains(f.Content, []byte{forbidden}) {
435 t.Errorf("found %d in content %q", forbidden, f.Content)
436 }
437 if bytes.Contains(f.Checksum, []byte{forbidden}) {
438 t.Errorf("found %d in checksum %q", forbidden, f.Checksum)
439 }
440
441 for _, l := range f.LineMatches {
442 if bytes.Contains(l.Line, []byte{forbidden}) {
443 t.Errorf("found %d in line %q", forbidden, l.Line)
444 }
445 }
446 }
447}
448
449func TestShardedSearcher_List(t *testing.T) {
450 repos := []*zoekt.Repository{
451 {
452 ID: 1234,
453 Name: "repo-a",
454 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
455 RawConfig: map[string]string{"repoid": "1234"},
456 },
457 {
458 Name: "repo-b",
459 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
460 },
461 }
462
463 doc := zoekt.Document{
464 Name: "foo.go",
465 Content: []byte("bar\nbaz"),
466 Branches: []string{"main", "dev"},
467 }
468
469 // Test duplicate removal when ListOptions.Minimal is true and false
470 ss := newShardedSearcher(4)
471 ss.replace(map[string]zoekt.Searcher{
472 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)),
473 "2": searcherForTest(t, testIndexBuilder(t, repos[0])),
474 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)),
475 "4": searcherForTest(t, testIndexBuilder(t, repos[1])),
476 })
477 ss.markReady()
478
479 stats := zoekt.RepoStats{
480 Shards: 2,
481 Documents: 1,
482 IndexBytes: 196,
483 ContentBytes: 13,
484 NewLinesCount: 1,
485 DefaultBranchNewLinesCount: 1,
486 OtherBranchesNewLinesCount: 1,
487 }
488
489 aggStats := stats
490 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works
491 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes.
492
493 for _, tc := range []struct {
494 name string
495 opts *zoekt.ListOptions
496 want *zoekt.RepoList
497 }{
498 {
499 name: "nil opts",
500 opts: nil,
501 want: &zoekt.RepoList{
502 Repos: []*zoekt.RepoListEntry{
503 {
504 Repository: *repos[0],
505 Stats: stats,
506 },
507 {
508 Repository: *repos[1],
509 Stats: stats,
510 },
511 },
512 Stats: aggStats,
513 },
514 },
515 {
516 name: "default",
517 opts: &zoekt.ListOptions{},
518 want: &zoekt.RepoList{
519 Repos: []*zoekt.RepoListEntry{
520 {
521 Repository: *repos[0],
522 Stats: stats,
523 },
524 {
525 Repository: *repos[1],
526 Stats: stats,
527 },
528 },
529 Stats: aggStats,
530 },
531 },
532 {
533 name: "field=repos",
534 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos},
535 want: &zoekt.RepoList{
536 Repos: []*zoekt.RepoListEntry{
537 {
538 Repository: *repos[0],
539 Stats: stats,
540 },
541 {
542 Repository: *repos[1],
543 Stats: stats,
544 },
545 },
546 Stats: aggStats,
547 },
548 },
549 {
550 name: "field=reposmap",
551 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap},
552 want: &zoekt.RepoList{
553 Repos: []*zoekt.RepoListEntry{
554 {
555 Repository: *repos[1],
556 Stats: stats,
557 },
558 },
559 ReposMap: zoekt.ReposMap{
560 repos[0].ID: {
561 HasSymbols: repos[0].HasSymbols,
562 Branches: repos[0].Branches,
563 },
564 },
565 Stats: aggStats,
566 },
567 },
568 } {
569 tc := tc
570 t.Run(tc.name, func(t *testing.T) {
571 t.Parallel()
572
573 q := &query.Repo{Regexp: regexp.MustCompile("repo")}
574
575 res, err := ss.List(context.Background(), q, tc.opts)
576 if err != nil {
577 t.Fatalf("List(%v, %s): %v", q, tc.opts, err)
578 }
579
580 sort.Slice(res.Repos, func(i, j int) bool {
581 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name
582 })
583
584 ignored := []cmp.Option{
585 cmpopts.EquateEmpty(),
586 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
587 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
588 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
589 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
590 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
591 }
592
593 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" {
594 t.Fatalf("mismatch (-want +got):\n%s", diff)
595 }
596 })
597 }
598}
599
600func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder {
601 b, err := zoekt.NewIndexBuilder(repo)
602 if err != nil {
603 t.Fatalf("NewIndexBuilder: %v", err)
604 }
605
606 for i, d := range docs {
607 if err := b.Add(d); err != nil {
608 t.Fatalf("Add %d: %v", i, err)
609 }
610 }
611 return b
612}
613
614func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher {
615 var buf bytes.Buffer
616 if err := b.Write(&buf); err != nil {
617 t.Fatal(err)
618 }
619 f := &memSeeker{buf.Bytes()}
620
621 searcher, err := zoekt.NewSearcher(f)
622 if err != nil {
623 t.Fatalf("NewSearcher: %v", err)
624 }
625
626 return searcher
627}
628
629func reposForTest(n int) (result []*zoekt.Repository) {
630 for i := 0; i < n; i++ {
631 result = append(result, &zoekt.Repository{
632 ID: uint32(i + 1),
633 Name: fmt.Sprintf("test-repository-%d", i),
634 })
635 }
636 return result
637}
638
639func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher {
640 builder := testIndexBuilder(b, r)
641
642 if err := builder.Add(zoekt.Document{
643 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0),
644 Content: []byte("needle needle needle haystack"),
645 }); err != nil {
646 b.Fatal(err)
647 }
648
649 for i := 1; i < numFiles; i++ {
650 if err := builder.Add(zoekt.Document{
651 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i),
652 Content: []byte("haystack haystack haystack"),
653 }); err != nil {
654 b.Fatal(err)
655 }
656 }
657
658 return searcherForTest(b, builder)
659}
660
661func BenchmarkShardedSearch(b *testing.B) {
662 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
663
664 filesPerRepo := 300
665 repos := reposForTest(3000)
666 var repoSetIDs []uint32
667
668 shards := make(map[string]zoekt.Searcher, len(repos))
669 for i, r := range repos {
670 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo)
671 if i%2 == 0 {
672 repoSetIDs = append(repoSetIDs, r.ID)
673 }
674 }
675
676 ss.replace(shards)
677
678 ctx := context.Background()
679 opts := &zoekt.SearchOptions{}
680
681 needleSub := &query.Substring{Pattern: "needle"}
682 haystackSub := &query.Substring{Pattern: "haystack"}
683 helloworldSub := &query.Substring{Pattern: "helloworld"}
684 haystackCap, err := query.Parse("hay(s(t))(a)ck")
685 if err != nil {
686 b.Fatal(err)
687 }
688
689 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck")
690 if err != nil {
691 b.Fatal(err)
692 }
693
694 setAnd := func(q query.Q) func() query.Q {
695 return func() query.Q {
696 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q)
697 }
698 }
699
700 search := func(b *testing.B, q query.Q, wantFiles int) {
701 b.Helper()
702
703 res, err := ss.Search(ctx, q, opts)
704 if err != nil {
705 b.Fatalf("Search(%s): %v", q, err)
706 }
707 if have := len(res.Files); have != wantFiles {
708 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles)
709 }
710 }
711
712 benchmarks := []struct {
713 name string
714 q func() query.Q
715 wantFiles int
716 }{
717 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo},
718 {"substring no results", func() query.Q { return helloworldSub }, 0},
719 {"substring some results", func() query.Q { return needleSub }, len(repos)},
720
721 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo},
722 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo},
723
724 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo},
725 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)},
726 {"substring no results and repo set", setAnd(helloworldSub), 0},
727 }
728
729 for _, bb := range benchmarks {
730 b.Run(bb.name, func(b *testing.B) {
731 b.ReportAllocs()
732
733 for n := 0; n < b.N; n++ {
734 q := bb.q()
735
736 search(b, q, bb.wantFiles)
737 }
738 })
739 }
740}
741
742func TestRawQuerySearch(t *testing.T) {
743 ss := newShardedSearcher(1)
744
745 var nextShardNum int
746 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) {
747 r := &zoekt.Repository{Name: repo}
748 r.RawConfig = rawConfig
749 b := testIndexBuilder(t, r, docs...)
750 shard := searcherForTest(t, b)
751 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard})
752 nextShardNum++
753 }
754 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
755 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
756 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
757
758 cases := []struct {
759 pattern string
760 flags query.RawConfig
761 wantRepos []string
762 wantFiles int
763 }{
764 {
765 pattern: "bas",
766 flags: query.RcOnlyPublic,
767 wantRepos: []string{"public"},
768 wantFiles: 1,
769 },
770 {
771 pattern: "foo",
772 flags: query.RcOnlyPublic,
773 wantRepos: []string{"public", "public_fork"},
774 wantFiles: 2,
775 },
776 {
777 pattern: "foo",
778 flags: query.RcOnlyPublic | query.RcNoForks,
779 wantRepos: []string{"public"},
780 wantFiles: 1,
781 },
782 {
783 pattern: "bar",
784 flags: query.RcOnlyForks,
785 wantRepos: []string{"public_fork"},
786 wantFiles: 1,
787 },
788 {
789 pattern: "bas",
790 flags: query.RcNoArchived,
791 wantRepos: []string{"public"},
792 wantFiles: 1,
793 },
794 {
795 pattern: "foo",
796 flags: query.RcNoForks,
797 wantRepos: []string{"public", "private_archived"},
798 wantFiles: 2,
799 },
800 {
801 pattern: "bas",
802 flags: query.RcOnlyArchived,
803 wantRepos: []string{"private_archived"},
804 wantFiles: 1,
805 },
806 {
807 pattern: "foo",
808 flags: query.RcOnlyPrivate,
809 wantRepos: []string{"private_archived"},
810 wantFiles: 1,
811 },
812 {
813 pattern: "foo",
814 flags: query.RcOnlyPrivate | query.RcNoArchived,
815 wantRepos: []string{},
816 wantFiles: 0,
817 },
818 }
819 for _, c := range cases {
820 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) {
821 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags)
822
823 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{})
824 if err != nil {
825 t.Fatal(err)
826 }
827
828 if got := len(sr.Files); got != c.wantFiles {
829 t.Fatalf("wanted %d, got %d", c.wantFiles, got)
830 }
831
832 if c.wantFiles == 0 {
833 return
834 }
835
836 gotRepos := make([]string, 0, len(sr.RepoURLs))
837 for k := range sr.RepoURLs {
838 gotRepos = append(gotRepos, k)
839 }
840 sort.Strings(gotRepos)
841 sort.Strings(c.wantRepos)
842 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" {
843 t.Fatalf("(-want, +got):\n%s", d)
844 }
845 })
846 }
847}
848
849func TestPrioritySlice(t *testing.T) {
850 p := &prioritySlice{}
851 for step, oper := range []struct {
852 isAppend bool
853 value float64
854 expectedMax float64
855 }{
856 {true, 1, 1},
857 {true, 3, 3},
858 {true, 2, 3},
859 {false, 1, 3},
860 {false, 3, 2},
861 {false, 2, math.Inf(-1)},
862 } {
863 if oper.isAppend {
864 p.append(oper.value)
865 } else {
866 p.remove(oper.value)
867 }
868 max := p.max()
869 if max != oper.expectedMax {
870 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax)
871 }
872 }
873}
874
875func TestSendByRepository(t *testing.T) {
876 wantStats := zoekt.Stats{}
877 wantStats.ShardsScanned = 1
878
879 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this
880 // test.
881 f := func(n1, n2, n3 uint8) bool {
882 sr := createMockSearchResult(n1, n2, n3, wantStats)
883
884 mock := &mockSender{}
885 sendByRepository(sr, &zoekt.SearchOptions{}, mock)
886
887 if diff := cmp.Diff(wantStats, mock.stats); diff != "" {
888 t.Logf("-want,+got\n%s", diff)
889 return false
890 }
891
892 nonZero := 0
893 for _, l := range []uint8{n1, n2, n3} {
894 if l > 0 {
895 nonZero++
896 }
897 }
898 if l := len(mock.files); l != nonZero {
899 t.Logf("wanted results from %d repositores, got %d", nonZero, l)
900 return false
901 }
902
903 gotTotal := 0
904 for _, fs := range mock.files {
905 gotTotal += len(fs)
906 }
907 wantTotal := int(n1) + int(n2) + int(n3)
908 if gotTotal != wantTotal {
909 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal)
910 return false
911 }
912
913 for _, fs := range mock.files {
914 if len(fs) == 0 {
915 t.Logf("got search result with 0 file matches after split")
916 return false
917 }
918 }
919 return true
920 }
921
922 if err := quick.Check(f, nil); err != nil {
923 t.Error(err)
924 }
925}
926
927type mockSender struct {
928 stats zoekt.Stats
929 files [][]zoekt.FileMatch
930}
931
932func (s *mockSender) Send(sr *zoekt.SearchResult) {
933 s.stats.Add(sr.Stats)
934 if len(sr.Files) == 0 {
935 return
936 }
937 s.files = append(s.files, sr.Files)
938}
939
940func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult {
941 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)}
942 for i, n := range []uint8{n1, n2, n3} {
943 if n == 0 {
944 continue
945 }
946 tmp := mkSearchResult(int(n), uint32(i))
947 sr.Files = append(sr.Files, tmp.Files...)
948 for k := range tmp.RepoURLs {
949 sr.RepoURLs[k] = ""
950 }
951 }
952 sr.Stats = stats
953 return sr
954}
955
956func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult {
957 if n == 0 {
958 return &zoekt.SearchResult{}
959 }
960 fm := make([]zoekt.FileMatch, 0, n)
961 for i := 0; i < n; i++ {
962 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID})
963 }
964
965 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}}
966}
967
968func TestFileBasedSearch(t *testing.T) {
969 cases := []struct {
970 name string
971 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
972 }{
973 {"Search", testShardedSearch},
974 {"StreamSearch", testShardedStreamSearch},
975 }
976
977 c1 := []byte("I love bananas without skin")
978 // -----------0123456789012345678901234567890123456789
979 c2 := []byte("In Dutch, ananas means pineapple")
980 // -----------0123456789012345678901234567890123456789
981 b := testIndexBuilder(t, nil,
982 zoekt.Document{Name: "f1", Content: c1},
983 zoekt.Document{Name: "f2", Content: c2},
984 )
985
986 for _, tt := range cases {
987 for _, useDocumentRanks := range []bool{false, true} {
988 t.Run(tt.name, func(t *testing.T) {
989 matches := tt.testShardedSearch(t, &query.Substring{
990 CaseSensitive: false,
991 Pattern: "ananas",
992 }, b, useDocumentRanks)
993
994 if len(matches) != 2 {
995 t.Fatalf("got %v, want 2 matches", matches)
996 }
997 if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
998 t.Fatalf("got %v, want matches {f1,f2}", matches)
999 }
1000 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
1001 t.Fatalf("got %#v, want offsets 10,8", matches)
1002 }
1003 })
1004 }
1005 }
1006}
1007
1008func TestWordBoundaryRanking(t *testing.T) {
1009 cases := []struct {
1010 name string
1011 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1012 }{
1013 {"Search", testShardedSearch},
1014 {"StreamSearch", testShardedStreamSearch},
1015 }
1016
1017 b := testIndexBuilder(t, nil,
1018 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")},
1019 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
1020 // -----------------------------------------0123456 789012 34567890
1021 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")})
1022
1023 for _, tt := range cases {
1024 for _, useDocumentRanks := range []bool{false, true} {
1025 t.Run(tt.name, func(t *testing.T) {
1026 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks)
1027
1028 if len(files) != 3 {
1029 t.Fatalf("got %#v, want 3 files", files)
1030 }
1031
1032 file0 := files[0]
1033 if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
1034 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
1035 }
1036
1037 if file0.LineMatches[0].LineFragments[0].Offset != 13 {
1038 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0])
1039 }
1040 if file0.LineMatches[1].LineFragments[0].Offset != 7 {
1041 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1])
1042 }
1043 })
1044 }
1045 }
1046}
1047
1048func TestAtomCountScore(t *testing.T) {
1049 cases := []struct {
1050 name string
1051 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1052 }{
1053 {"Search", testShardedSearch},
1054 {"StreamSearch", testShardedStreamSearch},
1055 }
1056
1057 b := testIndexBuilder(t,
1058 &zoekt.Repository{
1059 Branches: []zoekt.RepositoryBranch{
1060 {Name: "branches", Version: "v1"},
1061 {Name: "needle", Version: "v2"},
1062 },
1063 },
1064 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
1065 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
1066 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
1067
1068 for _, tt := range cases {
1069 for _, useDocumentRanks := range []bool{false, true} {
1070 t.Run(tt.name, func(t *testing.T) {
1071 files := tt.testShardedSearch(t,
1072 query.NewOr(
1073 &query.Substring{Pattern: "needle"},
1074 &query.Substring{Pattern: "needle", FileName: true},
1075 &query.Branch{Pattern: "needle"},
1076 ), b, useDocumentRanks)
1077 var got []string
1078 for _, f := range files {
1079 got = append(got, f.FileName)
1080 }
1081 want := []string{"needle-file-branch", "needle-file", "f1"}
1082 if !reflect.DeepEqual(got, want) {
1083 t.Errorf("got %v, want %v", got, want)
1084 }
1085 })
1086 }
1087 }
1088}
1089
1090func TestUseBM25Scoring(t *testing.T) {
1091 b := testIndexBuilder(t,
1092 &zoekt.Repository{},
1093 zoekt.Document{Name: "f1", Content: []byte("one two two three")},
1094 zoekt.Document{Name: "f2", Content: []byte("one two one two")},
1095 zoekt.Document{Name: "f3", Content: []byte("one three three three")})
1096
1097 ss := newShardedSearcher(1)
1098 searcher := searcherForTest(t, b)
1099 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1100
1101 q := query.NewOr(
1102 &query.Substring{Pattern: "one"},
1103 &query.Substring{Pattern: "three"})
1104
1105 opts := zoekt.SearchOptions{
1106 UseBM25Scoring: true,
1107 }
1108
1109 results, err := ss.Search(context.Background(), q, &opts)
1110 if err != nil {
1111 t.Fatal(err)
1112 }
1113
1114 var got []string
1115 for _, f := range results.Files {
1116 got = append(got, f.FileName)
1117 }
1118
1119 want := []string{"f3", "f1", "f2"}
1120 if !reflect.DeepEqual(got, want) {
1121 t.Errorf("got %v, want %v", got, want)
1122 }
1123}
1124
1125func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1126 ss := newShardedSearcher(1)
1127 searcher := searcherForTest(t, ib)
1128 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1129
1130 var files []zoekt.FileMatch
1131 sender := zoekt.SenderFunc(func(result *zoekt.SearchResult) {
1132 files = append(files, result.Files...)
1133 })
1134
1135 opts := zoekt.SearchOptions{}
1136 if useDocumentRanks {
1137 opts.UseDocumentRanks = true
1138 opts.FlushWallTime = 10 * time.Millisecond
1139 }
1140 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil {
1141 t.Fatal(err)
1142 }
1143 return files
1144}
1145
1146func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1147 ss := newShardedSearcher(1)
1148 searcher := searcherForTest(t, ib)
1149 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1150
1151 opts := zoekt.SearchOptions{}
1152 if useDocumentRanks {
1153 opts.UseDocumentRanks = true
1154 opts.FlushWallTime = 50 * time.Millisecond
1155 }
1156 sres, _ := ss.Search(context.Background(), q, &opts)
1157 return sres.Files
1158}
1159
1160// Ensure we work on empty shard directories.
1161func TestNewDirectorySearcher_empty(t *testing.T) {
1162 ctx := context.Background()
1163
1164 test := func(t *testing.T, ss zoekt.Streamer) {
1165 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1166 if err != nil {
1167 t.Fatal("Search non-nil error", err)
1168 }
1169
1170 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" {
1171 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff)
1172 }
1173
1174 rl, err := ss.List(ctx, &query.Const{Value: true}, nil)
1175 if err != nil {
1176 t.Fatal("List non-nil error", err)
1177 }
1178 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" {
1179 t.Fatalf("List had non empty results (-want, +got):\n%s", diff)
1180 }
1181 }
1182
1183 dir := t.TempDir()
1184 t.Run("blocking", func(t *testing.T) {
1185 ss, err := NewDirectorySearcher(dir)
1186 if err != nil {
1187 t.Fatal(err)
1188 }
1189 t.Cleanup(ss.Close)
1190
1191 // We expect crashes to be empty as soon as NewDirectorySearcher returns
1192 // so we can validate straight away.
1193 test(t, ss)
1194 })
1195
1196 t.Run("fast", func(t *testing.T) {
1197 ss, err := NewDirectorySearcherFast(dir)
1198 if err != nil {
1199 t.Fatal(err)
1200 }
1201 t.Cleanup(ss.Close)
1202
1203 deadline := testDeadline(t, 10*time.Second)
1204
1205 // Wait for scanning of directory to be done. We should be returning
1206 // non-zero crashes until then.
1207 waitForPredicate(deadline, 10*time.Millisecond, func() bool {
1208 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1209 if err != nil {
1210 t.Fatal(err)
1211 }
1212 return res.Stats.Crashes == 0
1213 })
1214
1215 test(t, ss)
1216 })
1217}
1218
1219// testDeadline returns the deadline for t, but ensures it is no longer than
1220// maxTimeout away.
1221func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time {
1222 deadline := time.Now().Add(maxTimeout)
1223 if d, ok := t.Deadline(); ok && d.Before(deadline) {
1224 // give 1s for us to do a final test run
1225 deadline = d.Add(-time.Second)
1226 }
1227 return deadline
1228}
1229
1230func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool {
1231 for time.Now().Before(deadline) {
1232 if pred() {
1233 return true
1234 }
1235
1236 time.Sleep(tick)
1237 }
1238
1239 return pred()
1240}