fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package shards
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "hash/fnv"
22 "log"
23 "math"
24 "os"
25 "reflect"
26 "runtime"
27 "sort"
28 "strconv"
29 "testing"
30 "testing/quick"
31 "time"
32
33 "github.com/RoaringBitmap/roaring"
34 "github.com/google/go-cmp/cmp"
35 "github.com/google/go-cmp/cmp/cmpopts"
36 "github.com/grafana/regexp"
37
38 "github.com/sourcegraph/zoekt"
39 "github.com/sourcegraph/zoekt/query"
40)
41
42type crashSearcher struct{}
43
44func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
45 panic("search")
46}
47
48func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
49 panic("list")
50}
51
52func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) {
53 return &zoekt.RepoStats{}, nil
54}
55
56func (s *crashSearcher) Close() {}
57
58func (s *crashSearcher) String() string { return "crashSearcher" }
59
60func TestCrashResilience(t *testing.T) {
61 out := &bytes.Buffer{}
62 log.SetOutput(out)
63 defer log.SetOutput(os.Stderr)
64 ss := newShardedSearcher(2)
65 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}})
66
67 var wantCrashes int
68 test := func(t *testing.T) {
69 q := &query.Substring{Pattern: "hoi"}
70 opts := &zoekt.SearchOptions{}
71 if res, err := ss.Search(context.Background(), q, opts); err != nil {
72 t.Fatalf("Search: %v", err)
73 } else if res.Stats.Crashes != wantCrashes {
74 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes)
75 }
76
77 if res, err := ss.List(context.Background(), q, nil); err != nil {
78 t.Fatalf("List: %v", err)
79 } else if res.Crashes != wantCrashes {
80 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes)
81 }
82 }
83
84 // Before we are marked as ready we have one extra crash
85 wantCrashes = 2
86 t.Run("loading", test)
87
88 // After marking as ready we should only have the crashSearcher
89 // contributing.
90 ss.markReady()
91 wantCrashes = 1
92 t.Run("ready", test)
93}
94
95type rankSearcher struct {
96 rank uint16
97 repo *zoekt.Repository
98}
99
100func (s *rankSearcher) Close() {
101}
102
103func (s *rankSearcher) String() string {
104 return ""
105}
106
107func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
108 select {
109 case <-ctx.Done():
110 return &zoekt.SearchResult{}, nil
111 default:
112 }
113
114 // Ugly, but without sleep it's too fast, and we can't
115 // simulate the cutoff.
116 time.Sleep(time.Millisecond)
117 return &zoekt.SearchResult{
118 Files: []zoekt.FileMatch{
119 {
120 FileName: fmt.Sprintf("f%d", s.rank),
121 Score: float64(s.rank),
122 },
123 },
124 Stats: zoekt.Stats{
125 MatchCount: 1,
126 },
127 }, nil
128}
129
130func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
131 r := zoekt.Repository{}
132 if s.repo != nil {
133 r = *s.repo
134 }
135 r.Rank = s.rank
136 return &zoekt.RepoList{
137 Repos: []*zoekt.RepoListEntry{
138 {Repository: r},
139 },
140 }, nil
141}
142
143func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo }
144
145func TestOrderByShard(t *testing.T) {
146 ss := newShardedSearcher(1)
147
148 n := 10 * runtime.GOMAXPROCS(0)
149 for i := 0; i < n; i++ {
150 ss.replace(map[string]zoekt.Searcher{
151 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)},
152 })
153 }
154
155 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil {
156 t.Errorf("Search: %v", err)
157 } else if len(res.Files) != n {
158 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n)
159 }
160
161 opts := zoekt.SearchOptions{
162 TotalMaxMatchCount: 3,
163 }
164 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts)
165 if err != nil {
166 t.Errorf("Search: %v", err)
167 }
168
169 if len(res.Files) < opts.TotalMaxMatchCount {
170 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount)
171 }
172 if len(res.Files) == n {
173 t.Errorf("got %d results, want < %d", len(res.Files), n)
174 }
175 for i, f := range res.Files {
176 rev := n - 1 - i
177 want := fmt.Sprintf("f%d", rev)
178 got := f.FileName
179
180 if got != want {
181 t.Logf("%d: got %q, want %q", i, got, want)
182 }
183 }
184}
185
186func TestShardedSearcher_Ranking(t *testing.T) {
187 ss := newShardedSearcher(1)
188
189 var nextShardNum int
190 addShard := func(repo string, priority float64, docs ...zoekt.Document) {
191 r := &zoekt.Repository{ID: hash(repo), Name: repo}
192 r.RawConfig = map[string]string{
193 "public": "1",
194 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
195 }
196 b := testIndexBuilder(t, r, docs...)
197 shard := searcherForTest(t, b)
198 ss.replace(map[string]zoekt.Searcher{
199 fmt.Sprintf("key-%d", nextShardNum): shard,
200 })
201 nextShardNum++
202 }
203
204 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
205 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
206 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
207 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
208
209 want := []string{
210 "super-star",
211 "moderately-popular",
212 "weekend-project",
213 "weekend-project-2",
214 }
215
216 var have []string
217 for _, s := range ss.getLoaded().shards {
218 for _, r := range s.repos {
219 have = append(have, r.Name)
220 }
221 }
222
223 if !reflect.DeepEqual(want, have) {
224 t.Fatalf("\nwant: %s\nhave: %s", want, have)
225 }
226}
227
228func TestShardedSearcher_DocumentRanking(t *testing.T) {
229 ss := newShardedSearcher(1)
230
231 var nextShardNum int
232 addShard := func(repo string, rank uint16, docs ...zoekt.Document) {
233 r := &zoekt.Repository{ID: hash(repo), Name: repo}
234 r.RawConfig = map[string]string{
235 "public": "1",
236 }
237 r.Rank = rank
238 b := testIndexBuilder(t, r, docs...)
239 shard := searcherForTest(t, b)
240 ss.replace(map[string]zoekt.Searcher{
241 fmt.Sprintf("key-%d", nextShardNum): shard,
242 })
243 nextShardNum++
244 }
245
246 addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")})
247 addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")})
248 addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
249 addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")},
250 zoekt.Document{Name: "f5", Content: []byte("fooooo")})
251
252 // Run a stream search and gather the results
253 var results []*zoekt.SearchResult
254 opts := &zoekt.SearchOptions{
255 FlushWallTime: 100 * time.Millisecond,
256 }
257
258 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts,
259 zoekt.SenderFunc(func(event *zoekt.SearchResult) {
260 results = append(results, event)
261 }))
262 if err != nil {
263 t.Fatal(err)
264 }
265
266 // There should always be two stream results, first progress-only, then the file results
267 if len(results) != 2 {
268 t.Fatalf("expected 2 streamed results, but got %d", len(results))
269 }
270
271 // The ranking should be determined by whether it's an exact word match,
272 // followed by repository priority
273 want := []string{"f4", "f3", "f5", "f2", "f1"}
274
275 files := results[1].Files
276 got := make([]string, len(files))
277 for i := 0; i < len(files); i++ {
278 got[i] = files[i].FileName
279 }
280
281 if !reflect.DeepEqual(got, want) {
282 t.Errorf("got %v, want %v", got, want)
283 }
284}
285
286func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
287 ss := newShardedSearcher(1)
288
289 // namePrefix is so we can create a repo:foo filter and match the same set
290 // of repos.
291 namePrefix := [3]string{"foo", "bar", "baz"}
292
293 repoSetNames := []string{}
294 repoIDs := []uint32{}
295 n := 10 * runtime.GOMAXPROCS(0)
296 for i := 0; i < n; i++ {
297 shardName := fmt.Sprintf("shard%d", i)
298 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i)
299 repoID := hash(repoName)
300
301 if i%3 == 0 {
302 repoSetNames = append(repoSetNames, repoName)
303 repoIDs = append(repoIDs, repoID)
304 }
305
306 ss.replace(map[string]zoekt.Searcher{
307 shardName: &rankSearcher{
308 repo: &zoekt.Repository{ID: repoID, Name: repoName},
309 rank: uint16(n - i),
310 },
311 })
312 }
313
314 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{})
315 if err != nil {
316 t.Errorf("Search: %v", err)
317 }
318 if len(res.Files) != n {
319 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n)
320 }
321
322 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{
323 {Branch: "HEAD", Repos: roaring.New()},
324 }}
325
326 for _, name := range repoSetNames {
327 branchesRepos.List[0].Repos.Add(hash(name))
328 }
329
330 set := query.NewRepoSet(repoSetNames...)
331 sub := &query.Substring{Pattern: "bla"}
332
333 repoIDsQuery := query.NewRepoIDs(repoIDs...)
334 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")}
335
336 queries := []query.Q{
337 query.NewAnd(set, sub),
338 // Test with the same reposet again
339 query.NewAnd(set, sub),
340
341 query.NewAnd(branchesRepos, sub),
342 // Test with the same repoBranches with IDs again
343 query.NewAnd(branchesRepos, sub),
344
345 query.NewAnd(repoIDsQuery, sub),
346 // Test with the same repoIDs again
347 query.NewAnd(repoIDsQuery, sub),
348
349 query.NewAnd(repoQuery, sub),
350 query.NewAnd(repoQuery, sub),
351
352 // List has queries which are just the reposet atoms. We also test twice.
353 set,
354 set,
355 branchesRepos,
356 branchesRepos,
357 repoIDsQuery,
358 repoIDsQuery,
359 repoQuery,
360 repoQuery,
361 }
362
363 for _, q := range queries {
364 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{})
365 if err != nil {
366 t.Errorf("Search(%s): %v", q, err)
367 }
368 // Note: Assertion is based on fact that `rankSearcher` always returns a
369 // result and using repoSet will half the number of results
370 if len(res.Files) != len(repoSetNames) {
371 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames))
372 }
373 }
374}
375
376func hash(name string) uint32 {
377 h := fnv.New32()
378 h.Write([]byte(name))
379 return h.Sum32()
380}
381
382type memSeeker struct {
383 data []byte
384}
385
386func (s *memSeeker) Name() string {
387 return "memseeker"
388}
389
390func (s *memSeeker) Close() {}
391func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
392 return s.data[off : off+sz], nil
393}
394
395func (s *memSeeker) Size() (uint32, error) {
396 return uint32(len(s.data)), nil
397}
398
399func TestUnloadIndex(t *testing.T) {
400 b := testIndexBuilder(t, nil, zoekt.Document{
401 Name: "filename",
402 Content: []byte("needle needle needle"),
403 })
404
405 var buf bytes.Buffer
406 if err := b.Write(&buf); err != nil {
407 t.Fatal(err)
408 }
409 indexBytes := buf.Bytes()
410 indexFile := &memSeeker{indexBytes}
411 searcher, err := zoekt.NewSearcher(indexFile)
412 if err != nil {
413 t.Fatalf("NewSearcher: %v", err)
414 }
415
416 ss := newShardedSearcher(2)
417 ss.replace(map[string]zoekt.Searcher{"key": searcher})
418
419 var opts zoekt.SearchOptions
420 q := &query.Substring{Pattern: "needle"}
421 res, err := ss.Search(context.Background(), q, &opts)
422 if err != nil {
423 t.Fatalf("Search(%s): %v", q, err)
424 }
425
426 forbidden := byte(29)
427 for i := range indexBytes {
428 // non-ASCII
429 indexBytes[i] = forbidden
430 }
431
432 for _, f := range res.Files {
433 if bytes.Contains(f.Content, []byte{forbidden}) {
434 t.Errorf("found %d in content %q", forbidden, f.Content)
435 }
436 if bytes.Contains(f.Checksum, []byte{forbidden}) {
437 t.Errorf("found %d in checksum %q", forbidden, f.Checksum)
438 }
439
440 for _, l := range f.LineMatches {
441 if bytes.Contains(l.Line, []byte{forbidden}) {
442 t.Errorf("found %d in line %q", forbidden, l.Line)
443 }
444 }
445 }
446}
447
448func TestShardedSearcher_List(t *testing.T) {
449 repos := []*zoekt.Repository{
450 {
451 ID: 1234,
452 Name: "repo-a",
453 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
454 RawConfig: map[string]string{"repoid": "1234"},
455 },
456 {
457 Name: "repo-b",
458 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
459 },
460 }
461
462 doc := zoekt.Document{
463 Name: "foo.go",
464 Content: []byte("bar\nbaz"),
465 Branches: []string{"main", "dev"},
466 }
467
468 // Test duplicate removal when ListOptions.Minimal is true and false
469 ss := newShardedSearcher(4)
470 ss.replace(map[string]zoekt.Searcher{
471 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)),
472 "2": searcherForTest(t, testIndexBuilder(t, repos[0])),
473 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)),
474 "4": searcherForTest(t, testIndexBuilder(t, repos[1])),
475 })
476 ss.markReady()
477
478 stats := zoekt.RepoStats{
479 Shards: 2,
480 Documents: 1,
481 IndexBytes: 196,
482 ContentBytes: 13,
483 NewLinesCount: 1,
484 DefaultBranchNewLinesCount: 1,
485 OtherBranchesNewLinesCount: 1,
486 }
487
488 aggStats := stats
489 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works
490 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes.
491
492 for _, tc := range []struct {
493 name string
494 opts *zoekt.ListOptions
495 want *zoekt.RepoList
496 }{
497 {
498 name: "nil opts",
499 opts: nil,
500 want: &zoekt.RepoList{
501 Repos: []*zoekt.RepoListEntry{
502 {
503 Repository: *repos[0],
504 Stats: stats,
505 },
506 {
507 Repository: *repos[1],
508 Stats: stats,
509 },
510 },
511 Stats: aggStats,
512 },
513 },
514 {
515 name: "default",
516 opts: &zoekt.ListOptions{},
517 want: &zoekt.RepoList{
518 Repos: []*zoekt.RepoListEntry{
519 {
520 Repository: *repos[0],
521 Stats: stats,
522 },
523 {
524 Repository: *repos[1],
525 Stats: stats,
526 },
527 },
528 Stats: aggStats,
529 },
530 },
531 {
532 name: "field=repos",
533 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos},
534 want: &zoekt.RepoList{
535 Repos: []*zoekt.RepoListEntry{
536 {
537 Repository: *repos[0],
538 Stats: stats,
539 },
540 {
541 Repository: *repos[1],
542 Stats: stats,
543 },
544 },
545 Stats: aggStats,
546 },
547 },
548 {
549 name: "field=reposmap",
550 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap},
551 want: &zoekt.RepoList{
552 Repos: []*zoekt.RepoListEntry{
553 {
554 Repository: *repos[1],
555 Stats: stats,
556 },
557 },
558 ReposMap: zoekt.ReposMap{
559 repos[0].ID: {
560 HasSymbols: repos[0].HasSymbols,
561 Branches: repos[0].Branches,
562 },
563 },
564 Stats: aggStats,
565 },
566 },
567 } {
568 tc := tc
569 t.Run(tc.name, func(t *testing.T) {
570 t.Parallel()
571
572 q := &query.Repo{Regexp: regexp.MustCompile("repo")}
573
574 res, err := ss.List(context.Background(), q, tc.opts)
575 if err != nil {
576 t.Fatalf("List(%v, %s): %v", q, tc.opts, err)
577 }
578
579 sort.Slice(res.Repos, func(i, j int) bool {
580 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name
581 })
582
583 ignored := []cmp.Option{
584 cmpopts.EquateEmpty(),
585 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
586 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
587 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
588 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
589 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
590 }
591
592 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" {
593 t.Fatalf("mismatch (-want +got):\n%s", diff)
594 }
595 })
596 }
597}
598
599func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder {
600 b, err := zoekt.NewIndexBuilder(repo)
601 if err != nil {
602 t.Fatalf("NewIndexBuilder: %v", err)
603 }
604
605 for i, d := range docs {
606 if err := b.Add(d); err != nil {
607 t.Fatalf("Add %d: %v", i, err)
608 }
609 }
610 return b
611}
612
613func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher {
614 var buf bytes.Buffer
615 if err := b.Write(&buf); err != nil {
616 t.Fatal(err)
617 }
618 f := &memSeeker{buf.Bytes()}
619
620 searcher, err := zoekt.NewSearcher(f)
621 if err != nil {
622 t.Fatalf("NewSearcher: %v", err)
623 }
624
625 return searcher
626}
627
628func reposForTest(n int) (result []*zoekt.Repository) {
629 for i := 0; i < n; i++ {
630 result = append(result, &zoekt.Repository{
631 ID: uint32(i + 1),
632 Name: fmt.Sprintf("test-repository-%d", i),
633 })
634 }
635 return result
636}
637
638func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher {
639 builder := testIndexBuilder(b, r)
640
641 if err := builder.Add(zoekt.Document{
642 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0),
643 Content: []byte("needle needle needle haystack"),
644 }); err != nil {
645 b.Fatal(err)
646 }
647
648 for i := 1; i < numFiles; i++ {
649 if err := builder.Add(zoekt.Document{
650 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i),
651 Content: []byte("haystack haystack haystack"),
652 }); err != nil {
653 b.Fatal(err)
654 }
655 }
656
657 return searcherForTest(b, builder)
658}
659
660func BenchmarkShardedSearch(b *testing.B) {
661 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
662
663 filesPerRepo := 300
664 repos := reposForTest(3000)
665 var repoSetIDs []uint32
666
667 shards := make(map[string]zoekt.Searcher, len(repos))
668 for i, r := range repos {
669 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo)
670 if i%2 == 0 {
671 repoSetIDs = append(repoSetIDs, r.ID)
672 }
673 }
674
675 ss.replace(shards)
676
677 ctx := context.Background()
678 opts := &zoekt.SearchOptions{}
679
680 needleSub := &query.Substring{Pattern: "needle"}
681 haystackSub := &query.Substring{Pattern: "haystack"}
682 helloworldSub := &query.Substring{Pattern: "helloworld"}
683 haystackCap, err := query.Parse("hay(s(t))(a)ck")
684 if err != nil {
685 b.Fatal(err)
686 }
687
688 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck")
689 if err != nil {
690 b.Fatal(err)
691 }
692
693 setAnd := func(q query.Q) func() query.Q {
694 return func() query.Q {
695 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q)
696 }
697 }
698
699 search := func(b *testing.B, q query.Q, wantFiles int) {
700 b.Helper()
701
702 res, err := ss.Search(ctx, q, opts)
703 if err != nil {
704 b.Fatalf("Search(%s): %v", q, err)
705 }
706 if have := len(res.Files); have != wantFiles {
707 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles)
708 }
709 }
710
711 benchmarks := []struct {
712 name string
713 q func() query.Q
714 wantFiles int
715 }{
716 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo},
717 {"substring no results", func() query.Q { return helloworldSub }, 0},
718 {"substring some results", func() query.Q { return needleSub }, len(repos)},
719
720 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo},
721 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo},
722
723 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo},
724 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)},
725 {"substring no results and repo set", setAnd(helloworldSub), 0},
726 }
727
728 for _, bb := range benchmarks {
729 b.Run(bb.name, func(b *testing.B) {
730 b.ReportAllocs()
731
732 for n := 0; n < b.N; n++ {
733 q := bb.q()
734
735 search(b, q, bb.wantFiles)
736 }
737 })
738 }
739}
740
741func TestRawQuerySearch(t *testing.T) {
742 ss := newShardedSearcher(1)
743
744 var nextShardNum int
745 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) {
746 r := &zoekt.Repository{Name: repo}
747 r.RawConfig = rawConfig
748 b := testIndexBuilder(t, r, docs...)
749 shard := searcherForTest(t, b)
750 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard})
751 nextShardNum++
752 }
753 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
754 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
755 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
756
757 cases := []struct {
758 pattern string
759 flags query.RawConfig
760 wantRepos []string
761 wantFiles int
762 }{
763 {
764 pattern: "bas",
765 flags: query.RcOnlyPublic,
766 wantRepos: []string{"public"},
767 wantFiles: 1,
768 },
769 {
770 pattern: "foo",
771 flags: query.RcOnlyPublic,
772 wantRepos: []string{"public", "public_fork"},
773 wantFiles: 2,
774 },
775 {
776 pattern: "foo",
777 flags: query.RcOnlyPublic | query.RcNoForks,
778 wantRepos: []string{"public"},
779 wantFiles: 1,
780 },
781 {
782 pattern: "bar",
783 flags: query.RcOnlyForks,
784 wantRepos: []string{"public_fork"},
785 wantFiles: 1,
786 },
787 {
788 pattern: "bas",
789 flags: query.RcNoArchived,
790 wantRepos: []string{"public"},
791 wantFiles: 1,
792 },
793 {
794 pattern: "foo",
795 flags: query.RcNoForks,
796 wantRepos: []string{"public", "private_archived"},
797 wantFiles: 2,
798 },
799 {
800 pattern: "bas",
801 flags: query.RcOnlyArchived,
802 wantRepos: []string{"private_archived"},
803 wantFiles: 1,
804 },
805 {
806 pattern: "foo",
807 flags: query.RcOnlyPrivate,
808 wantRepos: []string{"private_archived"},
809 wantFiles: 1,
810 },
811 {
812 pattern: "foo",
813 flags: query.RcOnlyPrivate | query.RcNoArchived,
814 wantRepos: []string{},
815 wantFiles: 0,
816 },
817 }
818 for _, c := range cases {
819 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) {
820 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags)
821
822 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{})
823 if err != nil {
824 t.Fatal(err)
825 }
826
827 if got := len(sr.Files); got != c.wantFiles {
828 t.Fatalf("wanted %d, got %d", c.wantFiles, got)
829 }
830
831 if c.wantFiles == 0 {
832 return
833 }
834
835 gotRepos := make([]string, 0, len(sr.RepoURLs))
836 for k := range sr.RepoURLs {
837 gotRepos = append(gotRepos, k)
838 }
839 sort.Strings(gotRepos)
840 sort.Strings(c.wantRepos)
841 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" {
842 t.Fatalf("(-want, +got):\n%s", d)
843 }
844 })
845 }
846}
847
848func TestPrioritySlice(t *testing.T) {
849 p := &prioritySlice{}
850 for step, oper := range []struct {
851 isAppend bool
852 value float64
853 expectedMax float64
854 }{
855 {true, 1, 1},
856 {true, 3, 3},
857 {true, 2, 3},
858 {false, 1, 3},
859 {false, 3, 2},
860 {false, 2, math.Inf(-1)},
861 } {
862 if oper.isAppend {
863 p.append(oper.value)
864 } else {
865 p.remove(oper.value)
866 }
867 max := p.max()
868 if max != oper.expectedMax {
869 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax)
870 }
871 }
872}
873
874func TestSendByRepository(t *testing.T) {
875 wantStats := zoekt.Stats{}
876 wantStats.ShardsScanned = 1
877
878 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this
879 // test.
880 f := func(n1, n2, n3 uint8) bool {
881 sr := createMockSearchResult(n1, n2, n3, wantStats)
882
883 mock := &mockSender{}
884 sendByRepository(sr, &zoekt.SearchOptions{}, mock)
885
886 if diff := cmp.Diff(wantStats, mock.stats); diff != "" {
887 t.Logf("-want,+got\n%s", diff)
888 return false
889 }
890
891 nonZero := 0
892 for _, l := range []uint8{n1, n2, n3} {
893 if l > 0 {
894 nonZero++
895 }
896 }
897 if l := len(mock.files); l != nonZero {
898 t.Logf("wanted results from %d repositores, got %d", nonZero, l)
899 return false
900 }
901
902 gotTotal := 0
903 for _, fs := range mock.files {
904 gotTotal += len(fs)
905 }
906 wantTotal := int(n1) + int(n2) + int(n3)
907 if gotTotal != wantTotal {
908 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal)
909 return false
910 }
911
912 for _, fs := range mock.files {
913 if len(fs) == 0 {
914 t.Logf("got search result with 0 file matches after split")
915 return false
916 }
917 }
918 return true
919 }
920
921 if err := quick.Check(f, nil); err != nil {
922 t.Error(err)
923 }
924}
925
926type mockSender struct {
927 stats zoekt.Stats
928 files [][]zoekt.FileMatch
929}
930
931func (s *mockSender) Send(sr *zoekt.SearchResult) {
932 s.stats.Add(sr.Stats)
933 if len(sr.Files) == 0 {
934 return
935 }
936 s.files = append(s.files, sr.Files)
937}
938
939func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult {
940 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)}
941 for i, n := range []uint8{n1, n2, n3} {
942 if n == 0 {
943 continue
944 }
945 tmp := mkSearchResult(int(n), uint32(i))
946 sr.Files = append(sr.Files, tmp.Files...)
947 for k := range tmp.RepoURLs {
948 sr.RepoURLs[k] = ""
949 }
950 }
951 sr.Stats = stats
952 return sr
953}
954
955func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult {
956 if n == 0 {
957 return &zoekt.SearchResult{}
958 }
959 fm := make([]zoekt.FileMatch, 0, n)
960 for i := 0; i < n; i++ {
961 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID})
962 }
963
964 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}}
965}
966
967func TestFileBasedSearch(t *testing.T) {
968 cases := []struct {
969 name string
970 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
971 }{
972 {"Search", testShardedSearch},
973 {"StreamSearch", testShardedStreamSearch},
974 }
975
976 c1 := []byte("I love bananas without skin")
977 // -----------0123456789012345678901234567890123456789
978 c2 := []byte("In Dutch, ananas means pineapple")
979 // -----------0123456789012345678901234567890123456789
980 b := testIndexBuilder(t, nil,
981 zoekt.Document{Name: "f1", Content: c1},
982 zoekt.Document{Name: "f2", Content: c2},
983 )
984
985 for _, tt := range cases {
986 for _, useDocumentRanks := range []bool{false, true} {
987 t.Run(tt.name, func(t *testing.T) {
988 matches := tt.testShardedSearch(t, &query.Substring{
989 CaseSensitive: false,
990 Pattern: "ananas",
991 }, b, useDocumentRanks)
992
993 if len(matches) != 2 {
994 t.Fatalf("got %v, want 2 matches", matches)
995 }
996 if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
997 t.Fatalf("got %v, want matches {f1,f2}", matches)
998 }
999 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
1000 t.Fatalf("got %#v, want offsets 10,8", matches)
1001 }
1002 })
1003 }
1004 }
1005}
1006
1007func TestWordBoundaryRanking(t *testing.T) {
1008 cases := []struct {
1009 name string
1010 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1011 }{
1012 {"Search", testShardedSearch},
1013 {"StreamSearch", testShardedStreamSearch},
1014 }
1015
1016 b := testIndexBuilder(t, nil,
1017 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")},
1018 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
1019 // -----------------------------------------0123456 789012 34567890
1020 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")})
1021
1022 for _, tt := range cases {
1023 for _, useDocumentRanks := range []bool{false, true} {
1024 t.Run(tt.name, func(t *testing.T) {
1025 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks)
1026
1027 if len(files) != 3 {
1028 t.Fatalf("got %#v, want 3 files", files)
1029 }
1030
1031 file0 := files[0]
1032 if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
1033 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
1034 }
1035
1036 if file0.LineMatches[0].LineFragments[0].Offset != 13 {
1037 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0])
1038 }
1039 if file0.LineMatches[1].LineFragments[0].Offset != 7 {
1040 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1])
1041 }
1042 })
1043 }
1044 }
1045}
1046
1047func TestAtomCountScore(t *testing.T) {
1048 cases := []struct {
1049 name string
1050 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1051 }{
1052 {"Search", testShardedSearch},
1053 {"StreamSearch", testShardedStreamSearch},
1054 }
1055
1056 b := testIndexBuilder(t,
1057 &zoekt.Repository{
1058 Branches: []zoekt.RepositoryBranch{
1059 {Name: "branches", Version: "v1"},
1060 {Name: "needle", Version: "v2"},
1061 },
1062 },
1063 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
1064 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
1065 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
1066
1067 for _, tt := range cases {
1068 for _, useDocumentRanks := range []bool{false, true} {
1069 t.Run(tt.name, func(t *testing.T) {
1070 files := tt.testShardedSearch(t,
1071 query.NewOr(
1072 &query.Substring{Pattern: "needle"},
1073 &query.Substring{Pattern: "needle", FileName: true},
1074 &query.Branch{Pattern: "needle"},
1075 ), b, useDocumentRanks)
1076 var got []string
1077 for _, f := range files {
1078 got = append(got, f.FileName)
1079 }
1080 want := []string{"needle-file-branch", "needle-file", "f1"}
1081 if !reflect.DeepEqual(got, want) {
1082 t.Errorf("got %v, want %v", got, want)
1083 }
1084 })
1085 }
1086 }
1087}
1088
1089func TestUseBM25Scoring(t *testing.T) {
1090 b := testIndexBuilder(t,
1091 &zoekt.Repository{},
1092 zoekt.Document{Name: "f1", Content: []byte("one two two three")},
1093 zoekt.Document{Name: "f2", Content: []byte("one two one two")},
1094 zoekt.Document{Name: "f3", Content: []byte("one three three three")})
1095
1096 ss := newShardedSearcher(1)
1097 searcher := searcherForTest(t, b)
1098 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1099
1100 q := query.NewOr(
1101 &query.Substring{Pattern: "one"},
1102 &query.Substring{Pattern: "three"})
1103
1104 opts := zoekt.SearchOptions{
1105 UseBM25Scoring: true,
1106 }
1107
1108 results, err := ss.Search(context.Background(), q, &opts)
1109 if err != nil {
1110 t.Fatal(err)
1111 }
1112
1113 var got []string
1114 for _, f := range results.Files {
1115 got = append(got, f.FileName)
1116 }
1117
1118 want := []string{"f3", "f1", "f2"}
1119 if !reflect.DeepEqual(got, want) {
1120 t.Errorf("got %v, want %v", got, want)
1121 }
1122}
1123
1124func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1125 ss := newShardedSearcher(1)
1126 searcher := searcherForTest(t, ib)
1127 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1128
1129 var files []zoekt.FileMatch
1130 sender := zoekt.SenderFunc(func(result *zoekt.SearchResult) {
1131 files = append(files, result.Files...)
1132 })
1133
1134 opts := zoekt.SearchOptions{}
1135 if useDocumentRanks {
1136 opts.FlushWallTime = 10 * time.Millisecond
1137 }
1138 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil {
1139 t.Fatal(err)
1140 }
1141 return files
1142}
1143
1144func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1145 ss := newShardedSearcher(1)
1146 searcher := searcherForTest(t, ib)
1147 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1148
1149 opts := zoekt.SearchOptions{}
1150 if useDocumentRanks {
1151 opts.FlushWallTime = 50 * time.Millisecond
1152 }
1153 sres, _ := ss.Search(context.Background(), q, &opts)
1154 return sres.Files
1155}
1156
1157// Ensure we work on empty shard directories.
1158func TestNewDirectorySearcher_empty(t *testing.T) {
1159 ctx := context.Background()
1160
1161 test := func(t *testing.T, ss zoekt.Streamer) {
1162 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1163 if err != nil {
1164 t.Fatal("Search non-nil error", err)
1165 }
1166
1167 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" {
1168 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff)
1169 }
1170
1171 rl, err := ss.List(ctx, &query.Const{Value: true}, nil)
1172 if err != nil {
1173 t.Fatal("List non-nil error", err)
1174 }
1175 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" {
1176 t.Fatalf("List had non empty results (-want, +got):\n%s", diff)
1177 }
1178 }
1179
1180 dir := t.TempDir()
1181 t.Run("blocking", func(t *testing.T) {
1182 ss, err := NewDirectorySearcher(dir)
1183 if err != nil {
1184 t.Fatal(err)
1185 }
1186 t.Cleanup(ss.Close)
1187
1188 // We expect crashes to be empty as soon as NewDirectorySearcher returns
1189 // so we can validate straight away.
1190 test(t, ss)
1191 })
1192
1193 t.Run("fast", func(t *testing.T) {
1194 ss, err := NewDirectorySearcherFast(dir)
1195 if err != nil {
1196 t.Fatal(err)
1197 }
1198 t.Cleanup(ss.Close)
1199
1200 deadline := testDeadline(t, 10*time.Second)
1201
1202 // Wait for scanning of directory to be done. We should be returning
1203 // non-zero crashes until then.
1204 waitForPredicate(deadline, 10*time.Millisecond, func() bool {
1205 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1206 if err != nil {
1207 t.Fatal(err)
1208 }
1209 return res.Stats.Crashes == 0
1210 })
1211
1212 test(t, ss)
1213 })
1214}
1215
1216// testDeadline returns the deadline for t, but ensures it is no longer than
1217// maxTimeout away.
1218func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time {
1219 deadline := time.Now().Add(maxTimeout)
1220 if d, ok := t.Deadline(); ok && d.Before(deadline) {
1221 // give 1s for us to do a final test run
1222 deadline = d.Add(-time.Second)
1223 }
1224 return deadline
1225}
1226
1227func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool {
1228 for time.Now().Before(deadline) {
1229 if pred() {
1230 return true
1231 }
1232
1233 time.Sleep(tick)
1234 }
1235
1236 return pred()
1237}