fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package shards
16
17import (
18 "bytes"
19 "context"
20 "flag"
21 "fmt"
22 "hash/fnv"
23 "io"
24 "log"
25 "math"
26 "os"
27 "reflect"
28 "runtime"
29 "sort"
30 "strconv"
31 "testing"
32 "testing/quick"
33 "time"
34
35 "github.com/RoaringBitmap/roaring"
36 "github.com/google/go-cmp/cmp"
37 "github.com/google/go-cmp/cmp/cmpopts"
38 "github.com/grafana/regexp"
39
40 "github.com/sourcegraph/zoekt"
41 "github.com/sourcegraph/zoekt/query"
42)
43
44func TestMain(m *testing.M) {
45 flag.Parse()
46 if !testing.Verbose() {
47 log.SetOutput(io.Discard)
48 }
49 os.Exit(m.Run())
50}
51
52type crashSearcher struct{}
53
54func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
55 panic("search")
56}
57
58func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
59 panic("list")
60}
61
62func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) {
63 return &zoekt.RepoStats{}, nil
64}
65
66func (s *crashSearcher) Close() {}
67
68func (s *crashSearcher) String() string { return "crashSearcher" }
69
70func TestCrashResilience(t *testing.T) {
71 out := &bytes.Buffer{}
72 oldOut := log.Writer()
73 log.SetOutput(out)
74 defer log.SetOutput(oldOut)
75
76 ss := newShardedSearcher(2)
77 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}})
78
79 var wantCrashes int
80 test := func(t *testing.T) {
81 q := &query.Substring{Pattern: "hoi"}
82 opts := &zoekt.SearchOptions{}
83 if res, err := ss.Search(context.Background(), q, opts); err != nil {
84 t.Fatalf("Search: %v", err)
85 } else if res.Stats.Crashes != wantCrashes {
86 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes)
87 }
88
89 if res, err := ss.List(context.Background(), q, nil); err != nil {
90 t.Fatalf("List: %v", err)
91 } else if res.Crashes != wantCrashes {
92 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes)
93 }
94 }
95
96 // Before we are marked as ready we have one extra crash
97 wantCrashes = 2
98 t.Run("loading", test)
99
100 // After marking as ready we should only have the crashSearcher
101 // contributing.
102 ss.markReady()
103 wantCrashes = 1
104 t.Run("ready", test)
105}
106
107type rankSearcher struct {
108 rank uint16
109 repo *zoekt.Repository
110}
111
112func (s *rankSearcher) Close() {
113}
114
115func (s *rankSearcher) String() string {
116 return ""
117}
118
119func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
120 select {
121 case <-ctx.Done():
122 return &zoekt.SearchResult{}, nil
123 default:
124 }
125
126 // Ugly, but without sleep it's too fast, and we can't
127 // simulate the cutoff.
128 time.Sleep(time.Millisecond)
129 return &zoekt.SearchResult{
130 Files: []zoekt.FileMatch{
131 {
132 FileName: fmt.Sprintf("f%d", s.rank),
133 Score: float64(s.rank),
134 },
135 },
136 Stats: zoekt.Stats{
137 MatchCount: 1,
138 },
139 }, nil
140}
141
142func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
143 r := zoekt.Repository{}
144 if s.repo != nil {
145 r = *s.repo
146 }
147 r.Rank = s.rank
148 return &zoekt.RepoList{
149 Repos: []*zoekt.RepoListEntry{
150 {Repository: r},
151 },
152 }, nil
153}
154
155func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo }
156
157func TestOrderByShard(t *testing.T) {
158 ss := newShardedSearcher(1)
159
160 n := 10 * runtime.GOMAXPROCS(0)
161 for i := 0; i < n; i++ {
162 ss.replace(map[string]zoekt.Searcher{
163 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)},
164 })
165 }
166
167 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil {
168 t.Errorf("Search: %v", err)
169 } else if len(res.Files) != n {
170 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n)
171 }
172
173 opts := zoekt.SearchOptions{
174 TotalMaxMatchCount: 3,
175 }
176 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts)
177 if err != nil {
178 t.Errorf("Search: %v", err)
179 }
180
181 if len(res.Files) < opts.TotalMaxMatchCount {
182 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount)
183 }
184 if len(res.Files) == n {
185 t.Errorf("got %d results, want < %d", len(res.Files), n)
186 }
187 for i, f := range res.Files {
188 rev := n - 1 - i
189 want := fmt.Sprintf("f%d", rev)
190 got := f.FileName
191
192 if got != want {
193 t.Logf("%d: got %q, want %q", i, got, want)
194 }
195 }
196}
197
198func TestShardedSearcher_Ranking(t *testing.T) {
199 ss := newShardedSearcher(1)
200
201 var nextShardNum int
202 addShard := func(repo string, priority float64, docs ...zoekt.Document) {
203 r := &zoekt.Repository{ID: hash(repo), Name: repo}
204 r.RawConfig = map[string]string{
205 "public": "1",
206 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
207 }
208 b := testIndexBuilder(t, r, docs...)
209 shard := searcherForTest(t, b)
210 ss.replace(map[string]zoekt.Searcher{
211 fmt.Sprintf("key-%d", nextShardNum): shard,
212 })
213 nextShardNum++
214 }
215
216 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
217 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
218 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
219 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
220
221 want := []string{
222 "super-star",
223 "moderately-popular",
224 "weekend-project",
225 "weekend-project-2",
226 }
227
228 var have []string
229 for _, s := range ss.getLoaded().shards {
230 for _, r := range s.repos {
231 have = append(have, r.Name)
232 }
233 }
234
235 if !reflect.DeepEqual(want, have) {
236 t.Fatalf("\nwant: %s\nhave: %s", want, have)
237 }
238}
239
240func TestShardedSearcher_DocumentRanking(t *testing.T) {
241 ss := newShardedSearcher(1)
242
243 var nextShardNum int
244 addShard := func(repo string, rank uint16, docs ...zoekt.Document) {
245 r := &zoekt.Repository{ID: hash(repo), Name: repo}
246 r.RawConfig = map[string]string{
247 "public": "1",
248 }
249 r.Rank = rank
250 b := testIndexBuilder(t, r, docs...)
251 shard := searcherForTest(t, b)
252 ss.replace(map[string]zoekt.Searcher{
253 fmt.Sprintf("key-%d", nextShardNum): shard,
254 })
255 nextShardNum++
256 }
257
258 addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")})
259 addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")})
260 addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
261 addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")},
262 zoekt.Document{Name: "f5", Content: []byte("fooooo")})
263
264 // Run a stream search and gather the results
265 var results []*zoekt.SearchResult
266 opts := &zoekt.SearchOptions{
267 FlushWallTime: 100 * time.Millisecond,
268 }
269
270 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts,
271 zoekt.SenderFunc(func(event *zoekt.SearchResult) {
272 results = append(results, event)
273 }))
274 if err != nil {
275 t.Fatal(err)
276 }
277
278 // There should always be two stream results, first progress-only, then the file results
279 if len(results) != 2 {
280 t.Fatalf("expected 2 streamed results, but got %d", len(results))
281 }
282
283 // The ranking should be determined by whether it's an exact word match,
284 // followed by repository priority
285 want := []string{"f4", "f3", "f5", "f2", "f1"}
286
287 files := results[1].Files
288 got := make([]string, len(files))
289 for i := 0; i < len(files); i++ {
290 got[i] = files[i].FileName
291 }
292
293 if !reflect.DeepEqual(got, want) {
294 t.Errorf("got %v, want %v", got, want)
295 }
296}
297
298func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
299 ss := newShardedSearcher(1)
300
301 // namePrefix is so we can create a repo:foo filter and match the same set
302 // of repos.
303 namePrefix := [3]string{"foo", "bar", "baz"}
304
305 repoSetNames := []string{}
306 repoIDs := []uint32{}
307 n := 10 * runtime.GOMAXPROCS(0)
308 for i := 0; i < n; i++ {
309 shardName := fmt.Sprintf("shard%d", i)
310 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i)
311 repoID := hash(repoName)
312
313 if i%3 == 0 {
314 repoSetNames = append(repoSetNames, repoName)
315 repoIDs = append(repoIDs, repoID)
316 }
317
318 ss.replace(map[string]zoekt.Searcher{
319 shardName: &rankSearcher{
320 repo: &zoekt.Repository{ID: repoID, Name: repoName},
321 rank: uint16(n - i),
322 },
323 })
324 }
325
326 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{})
327 if err != nil {
328 t.Errorf("Search: %v", err)
329 }
330 if len(res.Files) != n {
331 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n)
332 }
333
334 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{
335 {Branch: "HEAD", Repos: roaring.New()},
336 }}
337
338 for _, name := range repoSetNames {
339 branchesRepos.List[0].Repos.Add(hash(name))
340 }
341
342 set := query.NewRepoSet(repoSetNames...)
343 sub := &query.Substring{Pattern: "bla"}
344
345 repoIDsQuery := query.NewRepoIDs(repoIDs...)
346 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")}
347
348 queries := []query.Q{
349 query.NewAnd(set, sub),
350 // Test with the same reposet again
351 query.NewAnd(set, sub),
352
353 query.NewAnd(branchesRepos, sub),
354 // Test with the same repoBranches with IDs again
355 query.NewAnd(branchesRepos, sub),
356
357 query.NewAnd(repoIDsQuery, sub),
358 // Test with the same repoIDs again
359 query.NewAnd(repoIDsQuery, sub),
360
361 query.NewAnd(repoQuery, sub),
362 query.NewAnd(repoQuery, sub),
363
364 // List has queries which are just the reposet atoms. We also test twice.
365 set,
366 set,
367 branchesRepos,
368 branchesRepos,
369 repoIDsQuery,
370 repoIDsQuery,
371 repoQuery,
372 repoQuery,
373 }
374
375 for _, q := range queries {
376 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{})
377 if err != nil {
378 t.Errorf("Search(%s): %v", q, err)
379 }
380 // Note: Assertion is based on fact that `rankSearcher` always returns a
381 // result and using repoSet will half the number of results
382 if len(res.Files) != len(repoSetNames) {
383 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames))
384 }
385 }
386}
387
388func hash(name string) uint32 {
389 h := fnv.New32()
390 h.Write([]byte(name))
391 return h.Sum32()
392}
393
394type memSeeker struct {
395 data []byte
396}
397
398func (s *memSeeker) Name() string {
399 return "memseeker"
400}
401
402func (s *memSeeker) Close() {}
403func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
404 return s.data[off : off+sz], nil
405}
406
407func (s *memSeeker) Size() (uint32, error) {
408 return uint32(len(s.data)), nil
409}
410
411func TestUnloadIndex(t *testing.T) {
412 b := testIndexBuilder(t, nil, zoekt.Document{
413 Name: "filename",
414 Content: []byte("needle needle needle"),
415 })
416
417 var buf bytes.Buffer
418 if err := b.Write(&buf); err != nil {
419 t.Fatal(err)
420 }
421 indexBytes := buf.Bytes()
422 indexFile := &memSeeker{indexBytes}
423 searcher, err := zoekt.NewSearcher(indexFile)
424 if err != nil {
425 t.Fatalf("NewSearcher: %v", err)
426 }
427
428 ss := newShardedSearcher(2)
429 ss.replace(map[string]zoekt.Searcher{"key": searcher})
430
431 var opts zoekt.SearchOptions
432 q := &query.Substring{Pattern: "needle"}
433 res, err := ss.Search(context.Background(), q, &opts)
434 if err != nil {
435 t.Fatalf("Search(%s): %v", q, err)
436 }
437
438 forbidden := byte(29)
439 for i := range indexBytes {
440 // non-ASCII
441 indexBytes[i] = forbidden
442 }
443
444 for _, f := range res.Files {
445 if bytes.Contains(f.Content, []byte{forbidden}) {
446 t.Errorf("found %d in content %q", forbidden, f.Content)
447 }
448 if bytes.Contains(f.Checksum, []byte{forbidden}) {
449 t.Errorf("found %d in checksum %q", forbidden, f.Checksum)
450 }
451
452 for _, l := range f.LineMatches {
453 if bytes.Contains(l.Line, []byte{forbidden}) {
454 t.Errorf("found %d in line %q", forbidden, l.Line)
455 }
456 }
457 }
458}
459
460func TestShardedSearcher_List(t *testing.T) {
461 repos := []*zoekt.Repository{
462 {
463 ID: 1234,
464 Name: "repo-a",
465 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
466 RawConfig: map[string]string{"repoid": "1234"},
467 },
468 {
469 Name: "repo-b",
470 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
471 },
472 }
473
474 doc := zoekt.Document{
475 Name: "foo.go",
476 Content: []byte("bar\nbaz"),
477 Branches: []string{"main", "dev"},
478 }
479
480 // Test duplicate removal when ListOptions.Minimal is true and false
481 ss := newShardedSearcher(4)
482 ss.replace(map[string]zoekt.Searcher{
483 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)),
484 "2": searcherForTest(t, testIndexBuilder(t, repos[0])),
485 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)),
486 "4": searcherForTest(t, testIndexBuilder(t, repos[1])),
487 })
488 ss.markReady()
489
490 stats := zoekt.RepoStats{
491 Shards: 2,
492 Documents: 1,
493 IndexBytes: 196,
494 ContentBytes: 13,
495 NewLinesCount: 1,
496 DefaultBranchNewLinesCount: 1,
497 OtherBranchesNewLinesCount: 1,
498 }
499
500 aggStats := stats
501 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works
502 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes.
503
504 for _, tc := range []struct {
505 name string
506 opts *zoekt.ListOptions
507 want *zoekt.RepoList
508 }{
509 {
510 name: "nil opts",
511 opts: nil,
512 want: &zoekt.RepoList{
513 Repos: []*zoekt.RepoListEntry{
514 {
515 Repository: *repos[0],
516 Stats: stats,
517 },
518 {
519 Repository: *repos[1],
520 Stats: stats,
521 },
522 },
523 Stats: aggStats,
524 },
525 },
526 {
527 name: "default",
528 opts: &zoekt.ListOptions{},
529 want: &zoekt.RepoList{
530 Repos: []*zoekt.RepoListEntry{
531 {
532 Repository: *repos[0],
533 Stats: stats,
534 },
535 {
536 Repository: *repos[1],
537 Stats: stats,
538 },
539 },
540 Stats: aggStats,
541 },
542 },
543 {
544 name: "field=repos",
545 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos},
546 want: &zoekt.RepoList{
547 Repos: []*zoekt.RepoListEntry{
548 {
549 Repository: *repos[0],
550 Stats: stats,
551 },
552 {
553 Repository: *repos[1],
554 Stats: stats,
555 },
556 },
557 Stats: aggStats,
558 },
559 },
560 {
561 name: "field=reposmap",
562 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap},
563 want: &zoekt.RepoList{
564 Repos: []*zoekt.RepoListEntry{
565 {
566 Repository: *repos[1],
567 Stats: stats,
568 },
569 },
570 ReposMap: zoekt.ReposMap{
571 repos[0].ID: {
572 HasSymbols: repos[0].HasSymbols,
573 Branches: repos[0].Branches,
574 },
575 },
576 Stats: aggStats,
577 },
578 },
579 } {
580 tc := tc
581 t.Run(tc.name, func(t *testing.T) {
582 t.Parallel()
583
584 q := &query.Repo{Regexp: regexp.MustCompile("repo")}
585
586 res, err := ss.List(context.Background(), q, tc.opts)
587 if err != nil {
588 t.Fatalf("List(%v, %s): %v", q, tc.opts, err)
589 }
590
591 sort.Slice(res.Repos, func(i, j int) bool {
592 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name
593 })
594
595 ignored := []cmp.Option{
596 cmpopts.EquateEmpty(),
597 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
598 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
599 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
600 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
601 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
602 }
603
604 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" {
605 t.Fatalf("mismatch (-want +got):\n%s", diff)
606 }
607 })
608 }
609}
610
611func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder {
612 b, err := zoekt.NewIndexBuilder(repo)
613 if err != nil {
614 t.Fatalf("NewIndexBuilder: %v", err)
615 }
616
617 for i, d := range docs {
618 if err := b.Add(d); err != nil {
619 t.Fatalf("Add %d: %v", i, err)
620 }
621 }
622 return b
623}
624
625func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher {
626 var buf bytes.Buffer
627 if err := b.Write(&buf); err != nil {
628 t.Fatal(err)
629 }
630 f := &memSeeker{buf.Bytes()}
631
632 searcher, err := zoekt.NewSearcher(f)
633 if err != nil {
634 t.Fatalf("NewSearcher: %v", err)
635 }
636
637 return searcher
638}
639
640func reposForTest(n int) (result []*zoekt.Repository) {
641 for i := 0; i < n; i++ {
642 result = append(result, &zoekt.Repository{
643 ID: uint32(i + 1),
644 Name: fmt.Sprintf("test-repository-%d", i),
645 })
646 }
647 return result
648}
649
650func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher {
651 builder := testIndexBuilder(b, r)
652
653 if err := builder.Add(zoekt.Document{
654 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0),
655 Content: []byte("needle needle needle haystack"),
656 }); err != nil {
657 b.Fatal(err)
658 }
659
660 for i := 1; i < numFiles; i++ {
661 if err := builder.Add(zoekt.Document{
662 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i),
663 Content: []byte("haystack haystack haystack"),
664 }); err != nil {
665 b.Fatal(err)
666 }
667 }
668
669 return searcherForTest(b, builder)
670}
671
672func BenchmarkShardedSearch(b *testing.B) {
673 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
674
675 filesPerRepo := 300
676 repos := reposForTest(3000)
677 var repoSetIDs []uint32
678
679 shards := make(map[string]zoekt.Searcher, len(repos))
680 for i, r := range repos {
681 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo)
682 if i%2 == 0 {
683 repoSetIDs = append(repoSetIDs, r.ID)
684 }
685 }
686
687 ss.replace(shards)
688
689 ctx := context.Background()
690 opts := &zoekt.SearchOptions{}
691
692 needleSub := &query.Substring{Pattern: "needle"}
693 haystackSub := &query.Substring{Pattern: "haystack"}
694 helloworldSub := &query.Substring{Pattern: "helloworld"}
695 haystackCap, err := query.Parse("hay(s(t))(a)ck")
696 if err != nil {
697 b.Fatal(err)
698 }
699
700 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck")
701 if err != nil {
702 b.Fatal(err)
703 }
704
705 setAnd := func(q query.Q) func() query.Q {
706 return func() query.Q {
707 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q)
708 }
709 }
710
711 search := func(b *testing.B, q query.Q, wantFiles int) {
712 b.Helper()
713
714 res, err := ss.Search(ctx, q, opts)
715 if err != nil {
716 b.Fatalf("Search(%s): %v", q, err)
717 }
718 if have := len(res.Files); have != wantFiles {
719 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles)
720 }
721 }
722
723 benchmarks := []struct {
724 name string
725 q func() query.Q
726 wantFiles int
727 }{
728 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo},
729 {"substring no results", func() query.Q { return helloworldSub }, 0},
730 {"substring some results", func() query.Q { return needleSub }, len(repos)},
731
732 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo},
733 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo},
734
735 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo},
736 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)},
737 {"substring no results and repo set", setAnd(helloworldSub), 0},
738 }
739
740 for _, bb := range benchmarks {
741 b.Run(bb.name, func(b *testing.B) {
742 b.ReportAllocs()
743
744 for n := 0; n < b.N; n++ {
745 q := bb.q()
746
747 search(b, q, bb.wantFiles)
748 }
749 })
750 }
751}
752
753func TestRawQuerySearch(t *testing.T) {
754 ss := newShardedSearcher(1)
755
756 var nextShardNum int
757 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) {
758 r := &zoekt.Repository{Name: repo}
759 r.RawConfig = rawConfig
760 b := testIndexBuilder(t, r, docs...)
761 shard := searcherForTest(t, b)
762 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard})
763 nextShardNum++
764 }
765 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
766 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
767 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
768
769 cases := []struct {
770 pattern string
771 flags query.RawConfig
772 wantRepos []string
773 wantFiles int
774 }{
775 {
776 pattern: "bas",
777 flags: query.RcOnlyPublic,
778 wantRepos: []string{"public"},
779 wantFiles: 1,
780 },
781 {
782 pattern: "foo",
783 flags: query.RcOnlyPublic,
784 wantRepos: []string{"public", "public_fork"},
785 wantFiles: 2,
786 },
787 {
788 pattern: "foo",
789 flags: query.RcOnlyPublic | query.RcNoForks,
790 wantRepos: []string{"public"},
791 wantFiles: 1,
792 },
793 {
794 pattern: "bar",
795 flags: query.RcOnlyForks,
796 wantRepos: []string{"public_fork"},
797 wantFiles: 1,
798 },
799 {
800 pattern: "bas",
801 flags: query.RcNoArchived,
802 wantRepos: []string{"public"},
803 wantFiles: 1,
804 },
805 {
806 pattern: "foo",
807 flags: query.RcNoForks,
808 wantRepos: []string{"public", "private_archived"},
809 wantFiles: 2,
810 },
811 {
812 pattern: "bas",
813 flags: query.RcOnlyArchived,
814 wantRepos: []string{"private_archived"},
815 wantFiles: 1,
816 },
817 {
818 pattern: "foo",
819 flags: query.RcOnlyPrivate,
820 wantRepos: []string{"private_archived"},
821 wantFiles: 1,
822 },
823 {
824 pattern: "foo",
825 flags: query.RcOnlyPrivate | query.RcNoArchived,
826 wantRepos: []string{},
827 wantFiles: 0,
828 },
829 }
830 for _, c := range cases {
831 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) {
832 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags)
833
834 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{})
835 if err != nil {
836 t.Fatal(err)
837 }
838
839 if got := len(sr.Files); got != c.wantFiles {
840 t.Fatalf("wanted %d, got %d", c.wantFiles, got)
841 }
842
843 if c.wantFiles == 0 {
844 return
845 }
846
847 gotRepos := make([]string, 0, len(sr.RepoURLs))
848 for k := range sr.RepoURLs {
849 gotRepos = append(gotRepos, k)
850 }
851 sort.Strings(gotRepos)
852 sort.Strings(c.wantRepos)
853 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" {
854 t.Fatalf("(-want, +got):\n%s", d)
855 }
856 })
857 }
858}
859
860func TestPrioritySlice(t *testing.T) {
861 p := &prioritySlice{}
862 for step, oper := range []struct {
863 isAppend bool
864 value float64
865 expectedMax float64
866 }{
867 {true, 1, 1},
868 {true, 3, 3},
869 {true, 2, 3},
870 {false, 1, 3},
871 {false, 3, 2},
872 {false, 2, math.Inf(-1)},
873 } {
874 if oper.isAppend {
875 p.append(oper.value)
876 } else {
877 p.remove(oper.value)
878 }
879 max := p.max()
880 if max != oper.expectedMax {
881 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax)
882 }
883 }
884}
885
886func TestSendByRepository(t *testing.T) {
887 wantStats := zoekt.Stats{}
888 wantStats.ShardsScanned = 1
889
890 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this
891 // test.
892 f := func(n1, n2, n3 uint8) bool {
893 sr := createMockSearchResult(n1, n2, n3, wantStats)
894
895 mock := &mockSender{}
896 sendByRepository(sr, &zoekt.SearchOptions{}, mock)
897
898 if diff := cmp.Diff(wantStats, mock.stats); diff != "" {
899 t.Logf("-want,+got\n%s", diff)
900 return false
901 }
902
903 nonZero := 0
904 for _, l := range []uint8{n1, n2, n3} {
905 if l > 0 {
906 nonZero++
907 }
908 }
909 if l := len(mock.files); l != nonZero {
910 t.Logf("wanted results from %d repositores, got %d", nonZero, l)
911 return false
912 }
913
914 gotTotal := 0
915 for _, fs := range mock.files {
916 gotTotal += len(fs)
917 }
918 wantTotal := int(n1) + int(n2) + int(n3)
919 if gotTotal != wantTotal {
920 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal)
921 return false
922 }
923
924 for _, fs := range mock.files {
925 if len(fs) == 0 {
926 t.Logf("got search result with 0 file matches after split")
927 return false
928 }
929 }
930 return true
931 }
932
933 if err := quick.Check(f, nil); err != nil {
934 t.Error(err)
935 }
936}
937
938type mockSender struct {
939 stats zoekt.Stats
940 files [][]zoekt.FileMatch
941}
942
943func (s *mockSender) Send(sr *zoekt.SearchResult) {
944 s.stats.Add(sr.Stats)
945 if len(sr.Files) == 0 {
946 return
947 }
948 s.files = append(s.files, sr.Files)
949}
950
951func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult {
952 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)}
953 for i, n := range []uint8{n1, n2, n3} {
954 if n == 0 {
955 continue
956 }
957 tmp := mkSearchResult(int(n), uint32(i))
958 sr.Files = append(sr.Files, tmp.Files...)
959 for k := range tmp.RepoURLs {
960 sr.RepoURLs[k] = ""
961 }
962 }
963 sr.Stats = stats
964 return sr
965}
966
967func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult {
968 if n == 0 {
969 return &zoekt.SearchResult{}
970 }
971 fm := make([]zoekt.FileMatch, 0, n)
972 for i := 0; i < n; i++ {
973 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID})
974 }
975
976 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}}
977}
978
979func TestFileBasedSearch(t *testing.T) {
980 cases := []struct {
981 name string
982 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
983 }{
984 {"Search", testShardedSearch},
985 {"StreamSearch", testShardedStreamSearch},
986 }
987
988 c1 := []byte("I love bananas without skin")
989 // -----------0123456789012345678901234567890123456789
990 c2 := []byte("In Dutch, ananas means pineapple")
991 // -----------0123456789012345678901234567890123456789
992 b := testIndexBuilder(t, nil,
993 zoekt.Document{Name: "f1", Content: c1},
994 zoekt.Document{Name: "f2", Content: c2},
995 )
996
997 for _, tt := range cases {
998 for _, useDocumentRanks := range []bool{false, true} {
999 t.Run(tt.name, func(t *testing.T) {
1000 matches := tt.testShardedSearch(t, &query.Substring{
1001 CaseSensitive: false,
1002 Pattern: "ananas",
1003 }, b, useDocumentRanks)
1004
1005 if len(matches) != 2 {
1006 t.Fatalf("got %v, want 2 matches", matches)
1007 }
1008 if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
1009 t.Fatalf("got %v, want matches {f1,f2}", matches)
1010 }
1011 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
1012 t.Fatalf("got %#v, want offsets 10,8", matches)
1013 }
1014 })
1015 }
1016 }
1017}
1018
1019func TestWordBoundaryRanking(t *testing.T) {
1020 cases := []struct {
1021 name string
1022 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1023 }{
1024 {"Search", testShardedSearch},
1025 {"StreamSearch", testShardedStreamSearch},
1026 }
1027
1028 b := testIndexBuilder(t, nil,
1029 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")},
1030 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
1031 // -----------------------------------------0123456 789012 34567890
1032 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")})
1033
1034 for _, tt := range cases {
1035 for _, useDocumentRanks := range []bool{false, true} {
1036 t.Run(tt.name, func(t *testing.T) {
1037 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks)
1038
1039 if len(files) != 3 {
1040 t.Fatalf("got %#v, want 3 files", files)
1041 }
1042
1043 file0 := files[0]
1044 if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
1045 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
1046 }
1047
1048 if file0.LineMatches[0].LineFragments[0].Offset != 13 {
1049 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0])
1050 }
1051 if file0.LineMatches[1].LineFragments[0].Offset != 7 {
1052 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1])
1053 }
1054 })
1055 }
1056 }
1057}
1058
1059func TestAtomCountScore(t *testing.T) {
1060 cases := []struct {
1061 name string
1062 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1063 }{
1064 {"Search", testShardedSearch},
1065 {"StreamSearch", testShardedStreamSearch},
1066 }
1067
1068 b := testIndexBuilder(t,
1069 &zoekt.Repository{
1070 Branches: []zoekt.RepositoryBranch{
1071 {Name: "branches", Version: "v1"},
1072 {Name: "needle", Version: "v2"},
1073 },
1074 },
1075 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
1076 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
1077 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
1078
1079 for _, tt := range cases {
1080 for _, useDocumentRanks := range []bool{false, true} {
1081 t.Run(tt.name, func(t *testing.T) {
1082 files := tt.testShardedSearch(t,
1083 query.NewOr(
1084 &query.Substring{Pattern: "needle"},
1085 &query.Substring{Pattern: "needle", FileName: true},
1086 &query.Branch{Pattern: "needle"},
1087 ), b, useDocumentRanks)
1088 var got []string
1089 for _, f := range files {
1090 got = append(got, f.FileName)
1091 }
1092 want := []string{"needle-file-branch", "needle-file", "f1"}
1093 if !reflect.DeepEqual(got, want) {
1094 t.Errorf("got %v, want %v", got, want)
1095 }
1096 })
1097 }
1098 }
1099}
1100
1101func TestUseBM25Scoring(t *testing.T) {
1102 b := testIndexBuilder(t,
1103 &zoekt.Repository{},
1104 zoekt.Document{Name: "f1", Content: []byte("one two two three")},
1105 zoekt.Document{Name: "f2", Content: []byte("one two one two")},
1106 zoekt.Document{Name: "f3", Content: []byte("one three three three")})
1107
1108 ss := newShardedSearcher(1)
1109 searcher := searcherForTest(t, b)
1110 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1111
1112 q := query.NewOr(
1113 &query.Substring{Pattern: "one"},
1114 &query.Substring{Pattern: "three"})
1115
1116 opts := zoekt.SearchOptions{
1117 UseBM25Scoring: true,
1118 }
1119
1120 results, err := ss.Search(context.Background(), q, &opts)
1121 if err != nil {
1122 t.Fatal(err)
1123 }
1124
1125 var got []string
1126 for _, f := range results.Files {
1127 got = append(got, f.FileName)
1128 }
1129
1130 want := []string{"f3", "f1", "f2"}
1131 if !reflect.DeepEqual(got, want) {
1132 t.Errorf("got %v, want %v", got, want)
1133 }
1134}
1135
1136func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1137 ss := newShardedSearcher(1)
1138 searcher := searcherForTest(t, ib)
1139 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1140
1141 var files []zoekt.FileMatch
1142 sender := zoekt.SenderFunc(func(result *zoekt.SearchResult) {
1143 files = append(files, result.Files...)
1144 })
1145
1146 opts := zoekt.SearchOptions{}
1147 if useDocumentRanks {
1148 opts.FlushWallTime = 10 * time.Millisecond
1149 }
1150 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil {
1151 t.Fatal(err)
1152 }
1153 return files
1154}
1155
1156func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1157 ss := newShardedSearcher(1)
1158 searcher := searcherForTest(t, ib)
1159 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1160
1161 opts := zoekt.SearchOptions{}
1162 if useDocumentRanks {
1163 opts.FlushWallTime = 50 * time.Millisecond
1164 }
1165 sres, _ := ss.Search(context.Background(), q, &opts)
1166 return sres.Files
1167}
1168
1169// Ensure we work on empty shard directories.
1170func TestNewDirectorySearcher_empty(t *testing.T) {
1171 ctx := context.Background()
1172
1173 test := func(t *testing.T, ss zoekt.Streamer) {
1174 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1175 if err != nil {
1176 t.Fatal("Search non-nil error", err)
1177 }
1178
1179 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" {
1180 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff)
1181 }
1182
1183 rl, err := ss.List(ctx, &query.Const{Value: true}, nil)
1184 if err != nil {
1185 t.Fatal("List non-nil error", err)
1186 }
1187 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" {
1188 t.Fatalf("List had non empty results (-want, +got):\n%s", diff)
1189 }
1190 }
1191
1192 dir := t.TempDir()
1193 t.Run("blocking", func(t *testing.T) {
1194 ss, err := NewDirectorySearcher(dir)
1195 if err != nil {
1196 t.Fatal(err)
1197 }
1198 t.Cleanup(ss.Close)
1199
1200 // We expect crashes to be empty as soon as NewDirectorySearcher returns
1201 // so we can validate straight away.
1202 test(t, ss)
1203 })
1204
1205 t.Run("fast", func(t *testing.T) {
1206 ss, err := NewDirectorySearcherFast(dir)
1207 if err != nil {
1208 t.Fatal(err)
1209 }
1210 t.Cleanup(ss.Close)
1211
1212 deadline := testDeadline(t, 10*time.Second)
1213
1214 // Wait for scanning of directory to be done. We should be returning
1215 // non-zero crashes until then.
1216 waitForPredicate(deadline, 10*time.Millisecond, func() bool {
1217 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1218 if err != nil {
1219 t.Fatal(err)
1220 }
1221 return res.Stats.Crashes == 0
1222 })
1223
1224 test(t, ss)
1225 })
1226}
1227
1228// testDeadline returns the deadline for t, but ensures it is no longer than
1229// maxTimeout away.
1230func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time {
1231 deadline := time.Now().Add(maxTimeout)
1232 if d, ok := t.Deadline(); ok && d.Before(deadline) {
1233 // give 1s for us to do a final test run
1234 deadline = d.Add(-time.Second)
1235 }
1236 return deadline
1237}
1238
1239func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool {
1240 for time.Now().Before(deadline) {
1241 if pred() {
1242 return true
1243 }
1244
1245 time.Sleep(tick)
1246 }
1247
1248 return pred()
1249}