fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package shards
16
17import (
18 "bytes"
19 "context"
20 "flag"
21 "fmt"
22 "hash/fnv"
23 "io"
24 "log"
25 "math"
26 "os"
27 "reflect"
28 "runtime"
29 "sort"
30 "strconv"
31 "testing"
32 "testing/quick"
33 "time"
34
35 "github.com/RoaringBitmap/roaring"
36 "github.com/google/go-cmp/cmp"
37 "github.com/google/go-cmp/cmp/cmpopts"
38 "github.com/grafana/regexp"
39 "github.com/sourcegraph/zoekt/index"
40
41 "github.com/sourcegraph/zoekt"
42 "github.com/sourcegraph/zoekt/query"
43)
44
45func TestMain(m *testing.M) {
46 flag.Parse()
47 if !testing.Verbose() {
48 log.SetOutput(io.Discard)
49 }
50 os.Exit(m.Run())
51}
52
53type crashSearcher struct{}
54
55func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
56 panic("search")
57}
58
59func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
60 panic("list")
61}
62
63func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) {
64 return &zoekt.RepoStats{}, nil
65}
66
67func (s *crashSearcher) Close() {}
68
69func (s *crashSearcher) String() string { return "crashSearcher" }
70
71func TestCrashResilience(t *testing.T) {
72 out := &bytes.Buffer{}
73 oldOut := log.Writer()
74 log.SetOutput(out)
75 defer log.SetOutput(oldOut)
76
77 ss := newShardedSearcher(2)
78 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}})
79
80 var wantCrashes int
81 test := func(t *testing.T) {
82 q := &query.Substring{Pattern: "hoi"}
83 opts := &zoekt.SearchOptions{}
84 if res, err := ss.Search(context.Background(), q, opts); err != nil {
85 t.Fatalf("Search: %v", err)
86 } else if res.Stats.Crashes != wantCrashes {
87 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes)
88 }
89
90 if res, err := ss.List(context.Background(), q, nil); err != nil {
91 t.Fatalf("List: %v", err)
92 } else if res.Crashes != wantCrashes {
93 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes)
94 }
95 }
96
97 // Before we are marked as ready we have one extra crash
98 wantCrashes = 2
99 t.Run("loading", test)
100
101 // After marking as ready we should only have the crashSearcher
102 // contributing.
103 ss.markReady()
104 wantCrashes = 1
105 t.Run("ready", test)
106}
107
108type rankSearcher struct {
109 rank uint16
110 repo *zoekt.Repository
111}
112
113func (s *rankSearcher) Close() {
114}
115
116func (s *rankSearcher) String() string {
117 return ""
118}
119
120func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
121 select {
122 case <-ctx.Done():
123 return &zoekt.SearchResult{}, nil
124 default:
125 }
126
127 // Ugly, but without sleep it's too fast, and we can't
128 // simulate the cutoff.
129 time.Sleep(time.Millisecond)
130 return &zoekt.SearchResult{
131 Files: []zoekt.FileMatch{
132 {
133 FileName: fmt.Sprintf("f%d", s.rank),
134 Score: float64(s.rank),
135 },
136 },
137 Stats: zoekt.Stats{
138 MatchCount: 1,
139 },
140 }, nil
141}
142
143func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
144 r := zoekt.Repository{}
145 if s.repo != nil {
146 r = *s.repo
147 }
148 r.Rank = s.rank
149 return &zoekt.RepoList{
150 Repos: []*zoekt.RepoListEntry{
151 {Repository: r},
152 },
153 }, nil
154}
155
156func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo }
157
158func TestOrderByShard(t *testing.T) {
159 ss := newShardedSearcher(1)
160
161 n := 10 * runtime.GOMAXPROCS(0)
162 for i := 0; i < n; i++ {
163 ss.replace(map[string]zoekt.Searcher{
164 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)},
165 })
166 }
167
168 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil {
169 t.Errorf("Search: %v", err)
170 } else if len(res.Files) != n {
171 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n)
172 }
173
174 opts := zoekt.SearchOptions{
175 TotalMaxMatchCount: 3,
176 }
177 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts)
178 if err != nil {
179 t.Errorf("Search: %v", err)
180 }
181
182 if len(res.Files) < opts.TotalMaxMatchCount {
183 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount)
184 }
185 if len(res.Files) == n {
186 t.Errorf("got %d results, want < %d", len(res.Files), n)
187 }
188 for i, f := range res.Files {
189 rev := n - 1 - i
190 want := fmt.Sprintf("f%d", rev)
191 got := f.FileName
192
193 if got != want {
194 t.Logf("%d: got %q, want %q", i, got, want)
195 }
196 }
197}
198
199func TestShardedSearcher_Ranking(t *testing.T) {
200 ss := newShardedSearcher(1)
201
202 var nextShardNum int
203 addShard := func(repo string, priority float64, docs ...index.Document) {
204 r := &zoekt.Repository{ID: hash(repo), Name: repo}
205 r.RawConfig = map[string]string{
206 "public": "1",
207 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
208 }
209 b := testShardBuilder(t, r, docs...)
210 shard := searcherForTest(t, b)
211 ss.replace(map[string]zoekt.Searcher{
212 fmt.Sprintf("key-%d", nextShardNum): shard,
213 })
214 nextShardNum++
215 }
216
217 addShard("weekend-project", 20, index.Document{Name: "f2", Content: []byte("foo bas")})
218 addShard("moderately-popular", 500, index.Document{Name: "f3", Content: []byte("foo bar")})
219 addShard("weekend-project-2", 20, index.Document{Name: "f2", Content: []byte("foo bas")})
220 addShard("super-star", 5000, index.Document{Name: "f1", Content: []byte("foo bar bas")})
221
222 want := []string{
223 "super-star",
224 "moderately-popular",
225 "weekend-project",
226 "weekend-project-2",
227 }
228
229 var have []string
230 for _, s := range ss.getLoaded().shards {
231 for _, r := range s.repos {
232 have = append(have, r.Name)
233 }
234 }
235
236 if !reflect.DeepEqual(want, have) {
237 t.Fatalf("\nwant: %s\nhave: %s", want, have)
238 }
239}
240
241func TestShardedSearcher_DocumentRanking(t *testing.T) {
242 ss := newShardedSearcher(1)
243
244 var nextShardNum int
245 addShard := func(repo string, rank uint16, docs ...index.Document) {
246 r := &zoekt.Repository{ID: hash(repo), Name: repo}
247 r.RawConfig = map[string]string{
248 "public": "1",
249 }
250 r.Rank = rank
251 b := testShardBuilder(t, r, docs...)
252 shard := searcherForTest(t, b)
253 ss.replace(map[string]zoekt.Searcher{
254 fmt.Sprintf("key-%d", nextShardNum): shard,
255 })
256 nextShardNum++
257 }
258
259 addShard("old-project", 1, index.Document{Name: "f1", Content: []byte("foobar")})
260 addShard("recent", 2, index.Document{Name: "f2", Content: []byte("foobaz")})
261 addShard("old-project-2", 1, index.Document{Name: "f3", Content: []byte("foo bar")})
262 addShard("new", 3, index.Document{Name: "f4", Content: []byte("foo baz")},
263 index.Document{Name: "f5", Content: []byte("fooooo")})
264
265 // Run a stream search and gather the results
266 var results []*zoekt.SearchResult
267 opts := &zoekt.SearchOptions{
268 FlushWallTime: 100 * time.Millisecond,
269 }
270
271 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts,
272 zoekt.SenderFunc(func(event *zoekt.SearchResult) {
273 results = append(results, event)
274 }))
275 if err != nil {
276 t.Fatal(err)
277 }
278
279 // There should always be two stream results, first progress-only, then the file results
280 if len(results) != 2 {
281 t.Fatalf("expected 2 streamed results, but got %d", len(results))
282 }
283
284 // The ranking should be determined by whether it's an exact word match,
285 // followed by repository priority
286 want := []string{"f4", "f3", "f5", "f2", "f1"}
287
288 files := results[1].Files
289 got := make([]string, len(files))
290 for i := 0; i < len(files); i++ {
291 got[i] = files[i].FileName
292 }
293
294 if !reflect.DeepEqual(got, want) {
295 t.Errorf("got %v, want %v", got, want)
296 }
297}
298
299func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
300 ss := newShardedSearcher(1)
301
302 // namePrefix is so we can create a repo:foo filter and match the same set
303 // of repos.
304 namePrefix := [3]string{"foo", "bar", "baz"}
305
306 repoSetNames := []string{}
307 repoIDs := []uint32{}
308 n := 10 * runtime.GOMAXPROCS(0)
309 for i := 0; i < n; i++ {
310 shardName := fmt.Sprintf("shard%d", i)
311 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i)
312 repoID := hash(repoName)
313
314 if i%3 == 0 {
315 repoSetNames = append(repoSetNames, repoName)
316 repoIDs = append(repoIDs, repoID)
317 }
318
319 ss.replace(map[string]zoekt.Searcher{
320 shardName: &rankSearcher{
321 repo: &zoekt.Repository{ID: repoID, Name: repoName},
322 rank: uint16(n - i),
323 },
324 })
325 }
326
327 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{})
328 if err != nil {
329 t.Errorf("Search: %v", err)
330 }
331 if len(res.Files) != n {
332 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n)
333 }
334
335 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{
336 {Branch: "HEAD", Repos: roaring.New()},
337 }}
338
339 for _, name := range repoSetNames {
340 branchesRepos.List[0].Repos.Add(hash(name))
341 }
342
343 set := query.NewRepoSet(repoSetNames...)
344 sub := &query.Substring{Pattern: "bla"}
345
346 repoIDsQuery := query.NewRepoIDs(repoIDs...)
347 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")}
348
349 queries := []query.Q{
350 query.NewAnd(set, sub),
351 // Test with the same reposet again
352 query.NewAnd(set, sub),
353
354 query.NewAnd(branchesRepos, sub),
355 // Test with the same repoBranches with IDs again
356 query.NewAnd(branchesRepos, sub),
357
358 query.NewAnd(repoIDsQuery, sub),
359 // Test with the same repoIDs again
360 query.NewAnd(repoIDsQuery, sub),
361
362 query.NewAnd(repoQuery, sub),
363 query.NewAnd(repoQuery, sub),
364
365 // List has queries which are just the reposet atoms. We also test twice.
366 set,
367 set,
368 branchesRepos,
369 branchesRepos,
370 repoIDsQuery,
371 repoIDsQuery,
372 repoQuery,
373 repoQuery,
374 }
375
376 for _, q := range queries {
377 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{})
378 if err != nil {
379 t.Errorf("Search(%s): %v", q, err)
380 }
381 // Note: Assertion is based on fact that `rankSearcher` always returns a
382 // result and using repoSet will half the number of results
383 if len(res.Files) != len(repoSetNames) {
384 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames))
385 }
386 }
387}
388
389func hash(name string) uint32 {
390 h := fnv.New32()
391 h.Write([]byte(name))
392 return h.Sum32()
393}
394
395type memSeeker struct {
396 data []byte
397}
398
399func (s *memSeeker) Name() string {
400 return "memseeker"
401}
402
403func (s *memSeeker) Close() {}
404func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
405 return s.data[off : off+sz], nil
406}
407
408func (s *memSeeker) Size() (uint32, error) {
409 return uint32(len(s.data)), nil
410}
411
412func TestUnloadIndex(t *testing.T) {
413 b := testShardBuilder(t, nil, index.Document{
414 Name: "filename",
415 Content: []byte("needle needle needle"),
416 })
417
418 var buf bytes.Buffer
419 if err := b.Write(&buf); err != nil {
420 t.Fatal(err)
421 }
422 indexBytes := buf.Bytes()
423 indexFile := &memSeeker{indexBytes}
424 searcher, err := index.NewSearcher(indexFile)
425 if err != nil {
426 t.Fatalf("NewSearcher: %v", err)
427 }
428
429 ss := newShardedSearcher(2)
430 ss.replace(map[string]zoekt.Searcher{"key": searcher})
431
432 var opts zoekt.SearchOptions
433 q := &query.Substring{Pattern: "needle"}
434 res, err := ss.Search(context.Background(), q, &opts)
435 if err != nil {
436 t.Fatalf("Search(%s): %v", q, err)
437 }
438
439 forbidden := byte(29)
440 for i := range indexBytes {
441 // non-ASCII
442 indexBytes[i] = forbidden
443 }
444
445 for _, f := range res.Files {
446 if bytes.Contains(f.Content, []byte{forbidden}) {
447 t.Errorf("found %d in content %q", forbidden, f.Content)
448 }
449 if bytes.Contains(f.Checksum, []byte{forbidden}) {
450 t.Errorf("found %d in checksum %q", forbidden, f.Checksum)
451 }
452
453 for _, l := range f.LineMatches {
454 if bytes.Contains(l.Line, []byte{forbidden}) {
455 t.Errorf("found %d in line %q", forbidden, l.Line)
456 }
457 }
458 }
459}
460
461func TestShardedSearcher_List(t *testing.T) {
462 repos := []*zoekt.Repository{
463 {
464 ID: 1234,
465 Name: "repo-a",
466 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
467 RawConfig: map[string]string{"repoid": "1234"},
468 },
469 {
470 Name: "repo-b",
471 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
472 },
473 }
474
475 doc := index.Document{
476 Name: "foo.go",
477 Content: []byte("bar\nbaz"),
478 Branches: []string{"main", "dev"},
479 }
480
481 // Test duplicate removal when ListOptions.Minimal is true and false
482 ss := newShardedSearcher(4)
483 ss.replace(map[string]zoekt.Searcher{
484 "1": searcherForTest(t, testShardBuilder(t, repos[0], doc)),
485 "2": searcherForTest(t, testShardBuilder(t, repos[0])),
486 "3": searcherForTest(t, testShardBuilder(t, repos[1], doc)),
487 "4": searcherForTest(t, testShardBuilder(t, repos[1])),
488 })
489 ss.markReady()
490
491 stats := zoekt.RepoStats{
492 Shards: 2,
493 Documents: 1,
494 IndexBytes: 196,
495 ContentBytes: 13,
496 NewLinesCount: 1,
497 DefaultBranchNewLinesCount: 1,
498 OtherBranchesNewLinesCount: 1,
499 }
500
501 aggStats := stats
502 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works
503 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes.
504
505 for _, tc := range []struct {
506 name string
507 opts *zoekt.ListOptions
508 want *zoekt.RepoList
509 }{
510 {
511 name: "nil opts",
512 opts: nil,
513 want: &zoekt.RepoList{
514 Repos: []*zoekt.RepoListEntry{
515 {
516 Repository: *repos[0],
517 Stats: stats,
518 },
519 {
520 Repository: *repos[1],
521 Stats: stats,
522 },
523 },
524 Stats: aggStats,
525 },
526 },
527 {
528 name: "default",
529 opts: &zoekt.ListOptions{},
530 want: &zoekt.RepoList{
531 Repos: []*zoekt.RepoListEntry{
532 {
533 Repository: *repos[0],
534 Stats: stats,
535 },
536 {
537 Repository: *repos[1],
538 Stats: stats,
539 },
540 },
541 Stats: aggStats,
542 },
543 },
544 {
545 name: "field=repos",
546 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos},
547 want: &zoekt.RepoList{
548 Repos: []*zoekt.RepoListEntry{
549 {
550 Repository: *repos[0],
551 Stats: stats,
552 },
553 {
554 Repository: *repos[1],
555 Stats: stats,
556 },
557 },
558 Stats: aggStats,
559 },
560 },
561 {
562 name: "field=reposmap",
563 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap},
564 want: &zoekt.RepoList{
565 Repos: []*zoekt.RepoListEntry{
566 {
567 Repository: *repos[1],
568 Stats: stats,
569 },
570 },
571 ReposMap: zoekt.ReposMap{
572 repos[0].ID: {
573 HasSymbols: repos[0].HasSymbols,
574 Branches: repos[0].Branches,
575 },
576 },
577 Stats: aggStats,
578 },
579 },
580 } {
581 tc := tc
582 t.Run(tc.name, func(t *testing.T) {
583 t.Parallel()
584
585 q := &query.Repo{Regexp: regexp.MustCompile("repo")}
586
587 res, err := ss.List(context.Background(), q, tc.opts)
588 if err != nil {
589 t.Fatalf("List(%v, %s): %v", q, tc.opts, err)
590 }
591
592 sort.Slice(res.Repos, func(i, j int) bool {
593 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name
594 })
595
596 ignored := []cmp.Option{
597 cmpopts.EquateEmpty(),
598 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
599 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
600 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
601 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
602 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
603 }
604
605 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" {
606 t.Fatalf("mismatch (-want +got):\n%s", diff)
607 }
608 })
609 }
610}
611
612func testShardBuilder(t testing.TB, repo *zoekt.Repository, docs ...index.Document) *index.ShardBuilder {
613 b, err := index.NewShardBuilder(repo)
614 if err != nil {
615 t.Fatalf("NewShardBuilder: %v", err)
616 }
617
618 for i, d := range docs {
619 if err := b.Add(d); err != nil {
620 t.Fatalf("Add %d: %v", i, err)
621 }
622 }
623 return b
624}
625
626func searcherForTest(t testing.TB, b *index.ShardBuilder) zoekt.Searcher {
627 var buf bytes.Buffer
628 if err := b.Write(&buf); err != nil {
629 t.Fatal(err)
630 }
631 f := &memSeeker{buf.Bytes()}
632
633 searcher, err := index.NewSearcher(f)
634 if err != nil {
635 t.Fatalf("NewSearcher: %v", err)
636 }
637
638 return searcher
639}
640
641func reposForTest(n int) (result []*zoekt.Repository) {
642 for i := 0; i < n; i++ {
643 result = append(result, &zoekt.Repository{
644 ID: uint32(i + 1),
645 Name: fmt.Sprintf("test-repository-%d", i),
646 })
647 }
648 return result
649}
650
651func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher {
652 builder := testShardBuilder(b, r)
653
654 if err := builder.Add(index.Document{
655 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0),
656 Content: []byte("needle needle needle haystack"),
657 }); err != nil {
658 b.Fatal(err)
659 }
660
661 for i := 1; i < numFiles; i++ {
662 if err := builder.Add(index.Document{
663 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i),
664 Content: []byte("haystack haystack haystack"),
665 }); err != nil {
666 b.Fatal(err)
667 }
668 }
669
670 return searcherForTest(b, builder)
671}
672
673func BenchmarkShardedSearch(b *testing.B) {
674 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
675
676 filesPerRepo := 300
677 repos := reposForTest(3000)
678 var repoSetIDs []uint32
679
680 shards := make(map[string]zoekt.Searcher, len(repos))
681 for i, r := range repos {
682 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo)
683 if i%2 == 0 {
684 repoSetIDs = append(repoSetIDs, r.ID)
685 }
686 }
687
688 ss.replace(shards)
689
690 ctx := context.Background()
691 opts := &zoekt.SearchOptions{}
692
693 needleSub := &query.Substring{Pattern: "needle"}
694 haystackSub := &query.Substring{Pattern: "haystack"}
695 helloworldSub := &query.Substring{Pattern: "helloworld"}
696 haystackCap, err := query.Parse("hay(s(t))(a)ck")
697 if err != nil {
698 b.Fatal(err)
699 }
700
701 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck")
702 if err != nil {
703 b.Fatal(err)
704 }
705
706 setAnd := func(q query.Q) func() query.Q {
707 return func() query.Q {
708 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q)
709 }
710 }
711
712 search := func(b *testing.B, q query.Q, wantFiles int) {
713 b.Helper()
714
715 res, err := ss.Search(ctx, q, opts)
716 if err != nil {
717 b.Fatalf("Search(%s): %v", q, err)
718 }
719 if have := len(res.Files); have != wantFiles {
720 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles)
721 }
722 }
723
724 benchmarks := []struct {
725 name string
726 q func() query.Q
727 wantFiles int
728 }{
729 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo},
730 {"substring no results", func() query.Q { return helloworldSub }, 0},
731 {"substring some results", func() query.Q { return needleSub }, len(repos)},
732
733 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo},
734 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo},
735
736 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo},
737 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)},
738 {"substring no results and repo set", setAnd(helloworldSub), 0},
739 }
740
741 for _, bb := range benchmarks {
742 b.Run(bb.name, func(b *testing.B) {
743 b.ReportAllocs()
744
745 for n := 0; n < b.N; n++ {
746 q := bb.q()
747
748 search(b, q, bb.wantFiles)
749 }
750 })
751 }
752}
753
754func TestRawQuerySearch(t *testing.T) {
755 ss := newShardedSearcher(1)
756
757 var nextShardNum int
758 addShard := func(repo string, rawConfig map[string]string, docs ...index.Document) {
759 r := &zoekt.Repository{Name: repo}
760 r.RawConfig = rawConfig
761 b := testShardBuilder(t, r, docs...)
762 shard := searcherForTest(t, b)
763 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard})
764 nextShardNum++
765 }
766 addShard("public", map[string]string{"public": "1"}, index.Document{Name: "f1", Content: []byte("foo bar bas")})
767 addShard("private_archived", map[string]string{"archived": "1"}, index.Document{Name: "f2", Content: []byte("foo bas")})
768 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, index.Document{Name: "f3", Content: []byte("foo bar")})
769
770 cases := []struct {
771 pattern string
772 flags query.RawConfig
773 wantRepos []string
774 wantFiles int
775 }{
776 {
777 pattern: "bas",
778 flags: query.RcOnlyPublic,
779 wantRepos: []string{"public"},
780 wantFiles: 1,
781 },
782 {
783 pattern: "foo",
784 flags: query.RcOnlyPublic,
785 wantRepos: []string{"public", "public_fork"},
786 wantFiles: 2,
787 },
788 {
789 pattern: "foo",
790 flags: query.RcOnlyPublic | query.RcNoForks,
791 wantRepos: []string{"public"},
792 wantFiles: 1,
793 },
794 {
795 pattern: "bar",
796 flags: query.RcOnlyForks,
797 wantRepos: []string{"public_fork"},
798 wantFiles: 1,
799 },
800 {
801 pattern: "bas",
802 flags: query.RcNoArchived,
803 wantRepos: []string{"public"},
804 wantFiles: 1,
805 },
806 {
807 pattern: "foo",
808 flags: query.RcNoForks,
809 wantRepos: []string{"public", "private_archived"},
810 wantFiles: 2,
811 },
812 {
813 pattern: "bas",
814 flags: query.RcOnlyArchived,
815 wantRepos: []string{"private_archived"},
816 wantFiles: 1,
817 },
818 {
819 pattern: "foo",
820 flags: query.RcOnlyPrivate,
821 wantRepos: []string{"private_archived"},
822 wantFiles: 1,
823 },
824 {
825 pattern: "foo",
826 flags: query.RcOnlyPrivate | query.RcNoArchived,
827 wantRepos: []string{},
828 wantFiles: 0,
829 },
830 }
831 for _, c := range cases {
832 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) {
833 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags)
834
835 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{})
836 if err != nil {
837 t.Fatal(err)
838 }
839
840 if got := len(sr.Files); got != c.wantFiles {
841 t.Fatalf("wanted %d, got %d", c.wantFiles, got)
842 }
843
844 if c.wantFiles == 0 {
845 return
846 }
847
848 gotRepos := make([]string, 0, len(sr.RepoURLs))
849 for k := range sr.RepoURLs {
850 gotRepos = append(gotRepos, k)
851 }
852 sort.Strings(gotRepos)
853 sort.Strings(c.wantRepos)
854 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" {
855 t.Fatalf("(-want, +got):\n%s", d)
856 }
857 })
858 }
859}
860
861func TestPrioritySlice(t *testing.T) {
862 p := &prioritySlice{}
863 for step, oper := range []struct {
864 isAppend bool
865 value float64
866 expectedMax float64
867 }{
868 {true, 1, 1},
869 {true, 3, 3},
870 {true, 2, 3},
871 {false, 1, 3},
872 {false, 3, 2},
873 {false, 2, math.Inf(-1)},
874 } {
875 if oper.isAppend {
876 p.append(oper.value)
877 } else {
878 p.remove(oper.value)
879 }
880 max := p.max()
881 if max != oper.expectedMax {
882 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax)
883 }
884 }
885}
886
887func TestSendByRepository(t *testing.T) {
888 wantStats := zoekt.Stats{}
889 wantStats.ShardsScanned = 1
890
891 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this
892 // test.
893 f := func(n1, n2, n3 uint8) bool {
894 sr := createMockSearchResult(n1, n2, n3, wantStats)
895
896 mock := &mockSender{}
897 sendByRepository(sr, &zoekt.SearchOptions{}, mock)
898
899 if diff := cmp.Diff(wantStats, mock.stats); diff != "" {
900 t.Logf("-want,+got\n%s", diff)
901 return false
902 }
903
904 nonZero := 0
905 for _, l := range []uint8{n1, n2, n3} {
906 if l > 0 {
907 nonZero++
908 }
909 }
910 if l := len(mock.files); l != nonZero {
911 t.Logf("wanted results from %d repositores, got %d", nonZero, l)
912 return false
913 }
914
915 gotTotal := 0
916 for _, fs := range mock.files {
917 gotTotal += len(fs)
918 }
919 wantTotal := int(n1) + int(n2) + int(n3)
920 if gotTotal != wantTotal {
921 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal)
922 return false
923 }
924
925 for _, fs := range mock.files {
926 if len(fs) == 0 {
927 t.Logf("got search result with 0 file matches after split")
928 return false
929 }
930 }
931 return true
932 }
933
934 if err := quick.Check(f, nil); err != nil {
935 t.Error(err)
936 }
937}
938
939type mockSender struct {
940 stats zoekt.Stats
941 files [][]zoekt.FileMatch
942}
943
944func (s *mockSender) Send(sr *zoekt.SearchResult) {
945 s.stats.Add(sr.Stats)
946 if len(sr.Files) == 0 {
947 return
948 }
949 s.files = append(s.files, sr.Files)
950}
951
952func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult {
953 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)}
954 for i, n := range []uint8{n1, n2, n3} {
955 if n == 0 {
956 continue
957 }
958 tmp := mkSearchResult(int(n), uint32(i))
959 sr.Files = append(sr.Files, tmp.Files...)
960 for k := range tmp.RepoURLs {
961 sr.RepoURLs[k] = ""
962 }
963 }
964 sr.Stats = stats
965 return sr
966}
967
968func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult {
969 if n == 0 {
970 return &zoekt.SearchResult{}
971 }
972 fm := make([]zoekt.FileMatch, 0, n)
973 for i := 0; i < n; i++ {
974 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID})
975 }
976
977 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}}
978}
979
980func TestFileBasedSearch(t *testing.T) {
981 cases := []struct {
982 name string
983 testShardedSearch func(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch
984 }{
985 {"Search", testShardedSearch},
986 {"StreamSearch", testShardedStreamSearch},
987 }
988
989 c1 := []byte("I love bananas without skin")
990 // -----------0123456789012345678901234567890123456789
991 c2 := []byte("In Dutch, ananas means pineapple")
992 // -----------0123456789012345678901234567890123456789
993 b := testShardBuilder(t, nil,
994 index.Document{Name: "f1", Content: c1},
995 index.Document{Name: "f2", Content: c2},
996 )
997
998 for _, tt := range cases {
999 for _, useDocumentRanks := range []bool{false, true} {
1000 t.Run(tt.name, func(t *testing.T) {
1001 matches := tt.testShardedSearch(t, &query.Substring{
1002 CaseSensitive: false,
1003 Pattern: "ananas",
1004 }, b, useDocumentRanks)
1005
1006 if len(matches) != 2 {
1007 t.Fatalf("got %v, want 2 matches", matches)
1008 }
1009 if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
1010 t.Fatalf("got %v, want matches {f1,f2}", matches)
1011 }
1012 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
1013 t.Fatalf("got %#v, want offsets 10,8", matches)
1014 }
1015 })
1016 }
1017 }
1018}
1019
1020func TestWordBoundaryRanking(t *testing.T) {
1021 cases := []struct {
1022 name string
1023 testShardedSearch func(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch
1024 }{
1025 {"Search", testShardedSearch},
1026 {"StreamSearch", testShardedStreamSearch},
1027 }
1028
1029 b := testShardBuilder(t, nil,
1030 index.Document{Name: "f1", Content: []byte("xbytex xbytex")},
1031 index.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
1032 // -----------------------------------------0123456 789012 34567890
1033 index.Document{Name: "f3", Content: []byte("xbytex ybytex")})
1034
1035 for _, tt := range cases {
1036 for _, useDocumentRanks := range []bool{false, true} {
1037 t.Run(tt.name, func(t *testing.T) {
1038 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks)
1039
1040 if len(files) != 3 {
1041 t.Fatalf("got %#v, want 3 files", files)
1042 }
1043
1044 file0 := files[0]
1045 if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
1046 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
1047 }
1048
1049 if file0.LineMatches[0].LineFragments[0].Offset != 13 {
1050 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0])
1051 }
1052 if file0.LineMatches[1].LineFragments[0].Offset != 7 {
1053 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1])
1054 }
1055 })
1056 }
1057 }
1058}
1059
1060func TestAtomCountScore(t *testing.T) {
1061 cases := []struct {
1062 name string
1063 testShardedSearch func(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch
1064 }{
1065 {"Search", testShardedSearch},
1066 {"StreamSearch", testShardedStreamSearch},
1067 }
1068
1069 b := testShardBuilder(t,
1070 &zoekt.Repository{
1071 Branches: []zoekt.RepositoryBranch{
1072 {Name: "branches", Version: "v1"},
1073 {Name: "needle", Version: "v2"},
1074 },
1075 },
1076 index.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
1077 index.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
1078 index.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
1079
1080 for _, tt := range cases {
1081 for _, useDocumentRanks := range []bool{false, true} {
1082 t.Run(tt.name, func(t *testing.T) {
1083 files := tt.testShardedSearch(t,
1084 query.NewOr(
1085 &query.Substring{Pattern: "needle"},
1086 &query.Substring{Pattern: "needle", FileName: true},
1087 &query.Branch{Pattern: "needle"},
1088 ), b, useDocumentRanks)
1089 var got []string
1090 for _, f := range files {
1091 got = append(got, f.FileName)
1092 }
1093 want := []string{"needle-file-branch", "needle-file", "f1"}
1094 if !reflect.DeepEqual(got, want) {
1095 t.Errorf("got %v, want %v", got, want)
1096 }
1097 })
1098 }
1099 }
1100}
1101
1102func TestUseBM25Scoring(t *testing.T) {
1103 b := testShardBuilder(t,
1104 &zoekt.Repository{},
1105 index.Document{Name: "f1", Content: []byte("one two two three")},
1106 index.Document{Name: "f2", Content: []byte("one two one two")},
1107 index.Document{Name: "f3", Content: []byte("one three three three")})
1108
1109 ss := newShardedSearcher(1)
1110 searcher := searcherForTest(t, b)
1111 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1112
1113 q := query.NewOr(
1114 &query.Substring{Pattern: "one"},
1115 &query.Substring{Pattern: "three"})
1116
1117 opts := zoekt.SearchOptions{
1118 UseBM25Scoring: true,
1119 }
1120
1121 results, err := ss.Search(context.Background(), q, &opts)
1122 if err != nil {
1123 t.Fatal(err)
1124 }
1125
1126 var got []string
1127 for _, f := range results.Files {
1128 got = append(got, f.FileName)
1129 }
1130
1131 want := []string{"f3", "f1", "f2"}
1132 if !reflect.DeepEqual(got, want) {
1133 t.Errorf("got %v, want %v", got, want)
1134 }
1135}
1136
1137func testShardedStreamSearch(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1138 ss := newShardedSearcher(1)
1139 searcher := searcherForTest(t, ib)
1140 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1141
1142 var files []zoekt.FileMatch
1143 sender := zoekt.SenderFunc(func(result *zoekt.SearchResult) {
1144 files = append(files, result.Files...)
1145 })
1146
1147 opts := zoekt.SearchOptions{}
1148 if useDocumentRanks {
1149 opts.FlushWallTime = 10 * time.Millisecond
1150 }
1151 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil {
1152 t.Fatal(err)
1153 }
1154 return files
1155}
1156
1157func testShardedSearch(t *testing.T, q query.Q, ib *index.ShardBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1158 ss := newShardedSearcher(1)
1159 searcher := searcherForTest(t, ib)
1160 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1161
1162 opts := zoekt.SearchOptions{}
1163 if useDocumentRanks {
1164 opts.FlushWallTime = 50 * time.Millisecond
1165 }
1166 sres, _ := ss.Search(context.Background(), q, &opts)
1167 return sres.Files
1168}
1169
1170// Ensure we work on empty shard directories.
1171func TestNewDirectorySearcher_empty(t *testing.T) {
1172 ctx := context.Background()
1173
1174 test := func(t *testing.T, ss zoekt.Streamer) {
1175 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1176 if err != nil {
1177 t.Fatal("Search non-nil error", err)
1178 }
1179
1180 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" {
1181 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff)
1182 }
1183
1184 rl, err := ss.List(ctx, &query.Const{Value: true}, nil)
1185 if err != nil {
1186 t.Fatal("List non-nil error", err)
1187 }
1188 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" {
1189 t.Fatalf("List had non empty results (-want, +got):\n%s", diff)
1190 }
1191 }
1192
1193 dir := t.TempDir()
1194 t.Run("blocking", func(t *testing.T) {
1195 ss, err := NewDirectorySearcher(dir)
1196 if err != nil {
1197 t.Fatal(err)
1198 }
1199 t.Cleanup(ss.Close)
1200
1201 // We expect crashes to be empty as soon as NewDirectorySearcher returns
1202 // so we can validate straight away.
1203 test(t, ss)
1204 })
1205
1206 t.Run("fast", func(t *testing.T) {
1207 ss, err := NewDirectorySearcherFast(dir)
1208 if err != nil {
1209 t.Fatal(err)
1210 }
1211 t.Cleanup(ss.Close)
1212
1213 deadline := testDeadline(t, 10*time.Second)
1214
1215 // Wait for scanning of directory to be done. We should be returning
1216 // non-zero crashes until then.
1217 waitForPredicate(deadline, 10*time.Millisecond, func() bool {
1218 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1219 if err != nil {
1220 t.Fatal(err)
1221 }
1222 return res.Stats.Crashes == 0
1223 })
1224
1225 test(t, ss)
1226 })
1227}
1228
1229// testDeadline returns the deadline for t, but ensures it is no longer than
1230// maxTimeout away.
1231func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time {
1232 deadline := time.Now().Add(maxTimeout)
1233 if d, ok := t.Deadline(); ok && d.Before(deadline) {
1234 // give 1s for us to do a final test run
1235 deadline = d.Add(-time.Second)
1236 }
1237 return deadline
1238}
1239
1240func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool {
1241 for time.Now().Before(deadline) {
1242 if pred() {
1243 return true
1244 }
1245
1246 time.Sleep(tick)
1247 }
1248
1249 return pred()
1250}