fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package shards
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "hash/fnv"
22 "log"
23 "math"
24 "os"
25 "reflect"
26 "runtime"
27 "sort"
28 "strconv"
29 "testing"
30 "testing/quick"
31 "time"
32
33 "github.com/RoaringBitmap/roaring"
34 "github.com/google/go-cmp/cmp"
35 "github.com/google/go-cmp/cmp/cmpopts"
36 "github.com/grafana/regexp"
37
38 "github.com/sourcegraph/zoekt"
39 "github.com/sourcegraph/zoekt/query"
40 "github.com/sourcegraph/zoekt/stream"
41)
42
43type crashSearcher struct{}
44
45func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
46 panic("search")
47}
48
49func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
50 panic("list")
51}
52
53func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) {
54 return &zoekt.RepoStats{}, nil
55}
56
57func (s *crashSearcher) Close() {}
58
59func (s *crashSearcher) String() string { return "crashSearcher" }
60
61func TestCrashResilience(t *testing.T) {
62 out := &bytes.Buffer{}
63 log.SetOutput(out)
64 defer log.SetOutput(os.Stderr)
65 ss := newShardedSearcher(2)
66 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}})
67
68 var wantCrashes int
69 test := func(t *testing.T) {
70 q := &query.Substring{Pattern: "hoi"}
71 opts := &zoekt.SearchOptions{}
72 if res, err := ss.Search(context.Background(), q, opts); err != nil {
73 t.Fatalf("Search: %v", err)
74 } else if res.Stats.Crashes != wantCrashes {
75 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes)
76 }
77
78 if res, err := ss.List(context.Background(), q, nil); err != nil {
79 t.Fatalf("List: %v", err)
80 } else if res.Crashes != wantCrashes {
81 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes)
82 }
83 }
84
85 // Before we are marked as ready we have one extra crash
86 wantCrashes = 2
87 t.Run("loading", test)
88
89 // After marking as ready we should only have the crashSearcher
90 // contributing.
91 ss.markReady()
92 wantCrashes = 1
93 t.Run("ready", test)
94}
95
96type rankSearcher struct {
97 rank uint16
98 repo *zoekt.Repository
99}
100
101func (s *rankSearcher) Close() {
102}
103
104func (s *rankSearcher) String() string {
105 return ""
106}
107
108func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
109 select {
110 case <-ctx.Done():
111 return &zoekt.SearchResult{}, nil
112 default:
113 }
114
115 // Ugly, but without sleep it's too fast, and we can't
116 // simulate the cutoff.
117 time.Sleep(time.Millisecond)
118 return &zoekt.SearchResult{
119 Files: []zoekt.FileMatch{
120 {
121 FileName: fmt.Sprintf("f%d", s.rank),
122 Score: float64(s.rank),
123 },
124 },
125 Stats: zoekt.Stats{
126 MatchCount: 1,
127 },
128 }, nil
129}
130
131func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
132 r := zoekt.Repository{}
133 if s.repo != nil {
134 r = *s.repo
135 }
136 r.Rank = s.rank
137 return &zoekt.RepoList{
138 Repos: []*zoekt.RepoListEntry{
139 {Repository: r},
140 },
141 }, nil
142}
143
144func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo }
145
146func TestOrderByShard(t *testing.T) {
147 ss := newShardedSearcher(1)
148
149 n := 10 * runtime.GOMAXPROCS(0)
150 for i := 0; i < n; i++ {
151 ss.replace(map[string]zoekt.Searcher{
152 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)},
153 })
154 }
155
156 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil {
157 t.Errorf("Search: %v", err)
158 } else if len(res.Files) != n {
159 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n)
160 }
161
162 opts := zoekt.SearchOptions{
163 TotalMaxMatchCount: 3,
164 }
165 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts)
166 if err != nil {
167 t.Errorf("Search: %v", err)
168 }
169
170 if len(res.Files) < opts.TotalMaxMatchCount {
171 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount)
172 }
173 if len(res.Files) == n {
174 t.Errorf("got %d results, want < %d", len(res.Files), n)
175 }
176 for i, f := range res.Files {
177 rev := n - 1 - i
178 want := fmt.Sprintf("f%d", rev)
179 got := f.FileName
180
181 if got != want {
182 t.Logf("%d: got %q, want %q", i, got, want)
183 }
184 }
185}
186
187func TestShardedSearcher_Ranking(t *testing.T) {
188 ss := newShardedSearcher(1)
189
190 var nextShardNum int
191 addShard := func(repo string, priority float64, docs ...zoekt.Document) {
192 r := &zoekt.Repository{ID: hash(repo), Name: repo}
193 r.RawConfig = map[string]string{
194 "public": "1",
195 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
196 }
197 b := testIndexBuilder(t, r, docs...)
198 shard := searcherForTest(t, b)
199 ss.replace(map[string]zoekt.Searcher{
200 fmt.Sprintf("key-%d", nextShardNum): shard,
201 })
202 nextShardNum++
203 }
204
205 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
206 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
207 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
208 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
209
210 want := []string{
211 "super-star",
212 "moderately-popular",
213 "weekend-project",
214 "weekend-project-2",
215 }
216
217 var have []string
218 for _, s := range ss.getLoaded().shards {
219 for _, r := range s.repos {
220 have = append(have, r.Name)
221 }
222 }
223
224 if !reflect.DeepEqual(want, have) {
225 t.Fatalf("\nwant: %s\nhave: %s", want, have)
226 }
227}
228
229func TestShardedSearcher_DocumentRanking(t *testing.T) {
230 ss := newShardedSearcher(1)
231
232 var nextShardNum int
233 addShard := func(repo string, priority float64, docs ...zoekt.Document) {
234 r := &zoekt.Repository{ID: hash(repo), Name: repo}
235 r.RawConfig = map[string]string{
236 "public": "1",
237 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
238 }
239 b := testIndexBuilder(t, r, docs...)
240 shard := searcherForTest(t, b)
241 ss.replace(map[string]zoekt.Searcher{
242 fmt.Sprintf("key-%d", nextShardNum): shard,
243 })
244 nextShardNum++
245 }
246
247 addShard("weekend-project", 20, zoekt.Document{Name: "f1", Content: []byte("foobar")})
248 addShard("moderately-popular", 500, zoekt.Document{Name: "f2", Content: []byte("foobaz")})
249 addShard("weekend-project-2", 20, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
250 addShard("super-star", 5000, zoekt.Document{Name: "f4", Content: []byte("foo baz")},
251 zoekt.Document{Name: "f5", Content: []byte("fooooo")})
252
253 // Run a stream search and gather the results
254 var results []*zoekt.SearchResult
255 opts := &zoekt.SearchOptions{
256 UseDocumentRanks: true,
257 FlushWallTime: 100 * time.Millisecond,
258 }
259
260 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts,
261 stream.SenderFunc(func(event *zoekt.SearchResult) {
262 results = append(results, event)
263 }))
264
265 if err != nil {
266 t.Fatal(err)
267 }
268
269 // There should always be two stream results, first progress-only, then the file results
270 if len(results) != 2 {
271 t.Fatalf("expected 2 streamed results, but got %d", len(results))
272 }
273
274 // The ranking should be determined by whether it's an exact word match,
275 // followed by repository priority
276 want := []string{"f4", "f3", "f5", "f2", "f1"}
277
278 files := results[1].Files
279 got := make([]string, len(files))
280 for i := 0; i < len(files); i++ {
281 got[i] = files[i].FileName
282 }
283
284 if !reflect.DeepEqual(got, want) {
285 t.Errorf("got %v, want %v", got, want)
286 }
287}
288
289func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
290 ss := newShardedSearcher(1)
291
292 repoSetNames := []string{}
293 repoIDs := []uint32{}
294 n := 10 * runtime.GOMAXPROCS(0)
295 for i := 0; i < n; i++ {
296 shardName := fmt.Sprintf("shard%d", i)
297 repoName := fmt.Sprintf("repository%.3d", i)
298 repoID := hash(repoName)
299
300 if i%3 == 0 {
301 repoSetNames = append(repoSetNames, repoName)
302 repoIDs = append(repoIDs, repoID)
303 }
304
305 ss.replace(map[string]zoekt.Searcher{
306 shardName: &rankSearcher{
307 repo: &zoekt.Repository{ID: repoID, Name: repoName},
308 rank: uint16(n - i),
309 },
310 })
311 }
312
313 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{})
314 if err != nil {
315 t.Errorf("Search: %v", err)
316 }
317 if len(res.Files) != n {
318 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n)
319 }
320
321 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{
322 {Branch: "HEAD", Repos: roaring.New()},
323 }}
324
325 for _, name := range repoSetNames {
326 branchesRepos.List[0].Repos.Add(hash(name))
327 }
328
329 set := query.NewRepoSet(repoSetNames...)
330 sub := &query.Substring{Pattern: "bla"}
331
332 repoIDsQuery := query.NewRepoIDs(repoIDs...)
333
334 queries := []query.Q{
335 query.NewAnd(set, sub),
336 // Test with the same reposet again
337 query.NewAnd(set, sub),
338
339 query.NewAnd(branchesRepos, sub),
340 // Test with the same repoBranches with IDs again
341 query.NewAnd(branchesRepos, sub),
342
343 query.NewAnd(repoIDsQuery, sub),
344 // Test with the same repoIDs again
345 query.NewAnd(repoIDsQuery, sub),
346 }
347
348 for _, q := range queries {
349 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{})
350 if err != nil {
351 t.Errorf("Search(%s): %v", q, err)
352 }
353 // Note: Assertion is based on fact that `rankSearcher` always returns a
354 // result and using repoSet will half the number of results
355 if len(res.Files) != len(repoSetNames) {
356 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames))
357 }
358 }
359}
360
361func hash(name string) uint32 {
362 h := fnv.New32()
363 h.Write([]byte(name))
364 return h.Sum32()
365}
366
367type memSeeker struct {
368 data []byte
369}
370
371func (s *memSeeker) Name() string {
372 return "memseeker"
373}
374
375func (s *memSeeker) Close() {}
376func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
377 return s.data[off : off+sz], nil
378}
379
380func (s *memSeeker) Size() (uint32, error) {
381 return uint32(len(s.data)), nil
382}
383
384func TestUnloadIndex(t *testing.T) {
385 b := testIndexBuilder(t, nil, zoekt.Document{
386 Name: "filename",
387 Content: []byte("needle needle needle"),
388 })
389
390 var buf bytes.Buffer
391 if err := b.Write(&buf); err != nil {
392 t.Fatal(err)
393 }
394 indexBytes := buf.Bytes()
395 indexFile := &memSeeker{indexBytes}
396 searcher, err := zoekt.NewSearcher(indexFile)
397 if err != nil {
398 t.Fatalf("NewSearcher: %v", err)
399 }
400
401 ss := newShardedSearcher(2)
402 ss.replace(map[string]zoekt.Searcher{"key": searcher})
403
404 var opts zoekt.SearchOptions
405 q := &query.Substring{Pattern: "needle"}
406 res, err := ss.Search(context.Background(), q, &opts)
407 if err != nil {
408 t.Fatalf("Search(%s): %v", q, err)
409 }
410
411 forbidden := byte(29)
412 for i := range indexBytes {
413 // non-ASCII
414 indexBytes[i] = forbidden
415 }
416
417 for _, f := range res.Files {
418 if bytes.Contains(f.Content, []byte{forbidden}) {
419 t.Errorf("found %d in content %q", forbidden, f.Content)
420 }
421 if bytes.Contains(f.Checksum, []byte{forbidden}) {
422 t.Errorf("found %d in checksum %q", forbidden, f.Checksum)
423 }
424
425 for _, l := range f.LineMatches {
426 if bytes.Contains(l.Line, []byte{forbidden}) {
427 t.Errorf("found %d in line %q", forbidden, l.Line)
428 }
429 }
430 }
431}
432
433func TestShardedSearcher_List(t *testing.T) {
434 repos := []*zoekt.Repository{
435 {
436 ID: 1234,
437 Name: "repo-a",
438 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
439 RawConfig: map[string]string{"repoid": "1234"},
440 },
441 {
442 Name: "repo-b",
443 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
444 },
445 }
446
447 doc := zoekt.Document{
448 Name: "foo.go",
449 Content: []byte("bar\nbaz"),
450 Branches: []string{"main", "dev"},
451 }
452
453 // Test duplicate removal when ListOptions.Minimal is true and false
454 ss := newShardedSearcher(4)
455 ss.replace(map[string]zoekt.Searcher{
456 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)),
457 "2": searcherForTest(t, testIndexBuilder(t, repos[0])),
458 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)),
459 "4": searcherForTest(t, testIndexBuilder(t, repos[1])),
460 })
461 ss.markReady()
462
463 stats := zoekt.RepoStats{
464 Shards: 2,
465 Documents: 1,
466 IndexBytes: 196,
467 ContentBytes: 13,
468 NewLinesCount: 1,
469 DefaultBranchNewLinesCount: 1,
470 OtherBranchesNewLinesCount: 1,
471 }
472
473 aggStats := stats
474 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works
475 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes.
476
477 for _, tc := range []struct {
478 name string
479 opts *zoekt.ListOptions
480 want *zoekt.RepoList
481 }{
482 {
483 name: "nil opts",
484 opts: nil,
485 want: &zoekt.RepoList{
486 Repos: []*zoekt.RepoListEntry{
487 {
488 Repository: *repos[0],
489 Stats: stats,
490 },
491 {
492 Repository: *repos[1],
493 Stats: stats,
494 },
495 },
496 Stats: aggStats,
497 },
498 },
499 {
500 name: "minimal=false",
501 opts: &zoekt.ListOptions{Minimal: false},
502 want: &zoekt.RepoList{
503 Repos: []*zoekt.RepoListEntry{
504 {
505 Repository: *repos[0],
506 Stats: stats,
507 },
508 {
509 Repository: *repos[1],
510 Stats: stats,
511 },
512 },
513 Stats: aggStats,
514 },
515 },
516 {
517 name: "minimal=true",
518 opts: &zoekt.ListOptions{Minimal: true},
519 want: &zoekt.RepoList{
520 Repos: []*zoekt.RepoListEntry{
521 {
522 Repository: *repos[1],
523 Stats: stats,
524 },
525 },
526 Minimal: map[uint32]*zoekt.MinimalRepoListEntry{
527 repos[0].ID: {
528 HasSymbols: repos[0].HasSymbols,
529 Branches: repos[0].Branches,
530 },
531 },
532 Stats: aggStats,
533 },
534 },
535 {
536 name: "field=repos",
537 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos},
538 want: &zoekt.RepoList{
539 Repos: []*zoekt.RepoListEntry{
540 {
541 Repository: *repos[0],
542 Stats: stats,
543 },
544 {
545 Repository: *repos[1],
546 Stats: stats,
547 },
548 },
549 Stats: aggStats,
550 },
551 },
552 {
553 name: "field=minimal",
554 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldMinimal},
555 want: &zoekt.RepoList{
556 Repos: []*zoekt.RepoListEntry{
557 {
558 Repository: *repos[1],
559 Stats: stats,
560 },
561 },
562 Minimal: map[uint32]*zoekt.MinimalRepoListEntry{
563 repos[0].ID: {
564 HasSymbols: repos[0].HasSymbols,
565 Branches: repos[0].Branches,
566 },
567 },
568 Stats: aggStats,
569 },
570 },
571 {
572 name: "field=reposmap",
573 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap},
574 want: &zoekt.RepoList{
575 Repos: []*zoekt.RepoListEntry{
576 {
577 Repository: *repos[1],
578 Stats: stats,
579 },
580 },
581 ReposMap: zoekt.ReposMap{
582 repos[0].ID: {
583 HasSymbols: repos[0].HasSymbols,
584 Branches: repos[0].Branches,
585 },
586 },
587 Stats: aggStats,
588 },
589 },
590 } {
591 tc := tc
592 t.Run(tc.name, func(t *testing.T) {
593 t.Parallel()
594
595 q := &query.Repo{Regexp: regexp.MustCompile("repo")}
596
597 res, err := ss.List(context.Background(), q, tc.opts)
598 if err != nil {
599 t.Fatalf("List(%v, %s): %v", q, tc.opts, err)
600 }
601
602 sort.Slice(res.Repos, func(i, j int) bool {
603 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name
604 })
605
606 ignored := []cmp.Option{
607 cmpopts.EquateEmpty(),
608 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
609 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
610 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
611 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
612 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
613 }
614
615 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" {
616 t.Fatalf("mismatch (-want +got):\n%s", diff)
617 }
618 })
619 }
620}
621
622func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder {
623 b, err := zoekt.NewIndexBuilder(repo)
624 if err != nil {
625 t.Fatalf("NewIndexBuilder: %v", err)
626 }
627
628 for i, d := range docs {
629 if err := b.Add(d); err != nil {
630 t.Fatalf("Add %d: %v", i, err)
631 }
632 }
633 return b
634}
635
636func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher {
637 var buf bytes.Buffer
638 if err := b.Write(&buf); err != nil {
639 t.Fatal(err)
640 }
641 f := &memSeeker{buf.Bytes()}
642
643 searcher, err := zoekt.NewSearcher(f)
644 if err != nil {
645 t.Fatalf("NewSearcher: %v", err)
646 }
647
648 return searcher
649}
650
651func reposForTest(n int) (result []*zoekt.Repository) {
652 for i := 0; i < n; i++ {
653 result = append(result, &zoekt.Repository{
654 ID: uint32(i + 1),
655 Name: fmt.Sprintf("test-repository-%d", i),
656 })
657 }
658 return result
659}
660
661func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher {
662 builder := testIndexBuilder(b, r)
663
664 if err := builder.Add(zoekt.Document{
665 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0),
666 Content: []byte("needle needle needle haystack"),
667 }); err != nil {
668 b.Fatal(err)
669 }
670
671 for i := 1; i < numFiles; i++ {
672 if err := builder.Add(zoekt.Document{
673 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i),
674 Content: []byte("haystack haystack haystack"),
675 }); err != nil {
676 b.Fatal(err)
677 }
678 }
679
680 return searcherForTest(b, builder)
681}
682
683func BenchmarkShardedSearch(b *testing.B) {
684 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
685
686 filesPerRepo := 300
687 repos := reposForTest(3000)
688 var repoSetIDs []uint32
689
690 shards := make(map[string]zoekt.Searcher, len(repos))
691 for i, r := range repos {
692 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo)
693 if i%2 == 0 {
694 repoSetIDs = append(repoSetIDs, r.ID)
695 }
696 }
697
698 ss.replace(shards)
699
700 ctx := context.Background()
701 opts := &zoekt.SearchOptions{}
702
703 needleSub := &query.Substring{Pattern: "needle"}
704 haystackSub := &query.Substring{Pattern: "haystack"}
705 helloworldSub := &query.Substring{Pattern: "helloworld"}
706 haystackCap, err := query.Parse("hay(s(t))(a)ck")
707 if err != nil {
708 b.Fatal(err)
709 }
710
711 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck")
712 if err != nil {
713 b.Fatal(err)
714 }
715
716 setAnd := func(q query.Q) func() query.Q {
717 return func() query.Q {
718 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q)
719 }
720 }
721
722 search := func(b *testing.B, q query.Q, wantFiles int) {
723 b.Helper()
724
725 res, err := ss.Search(ctx, q, opts)
726 if err != nil {
727 b.Fatalf("Search(%s): %v", q, err)
728 }
729 if have := len(res.Files); have != wantFiles {
730 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles)
731 }
732 }
733
734 benchmarks := []struct {
735 name string
736 q func() query.Q
737 wantFiles int
738 }{
739 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo},
740 {"substring no results", func() query.Q { return helloworldSub }, 0},
741 {"substring some results", func() query.Q { return needleSub }, len(repos)},
742
743 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo},
744 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo},
745
746 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo},
747 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)},
748 {"substring no results and repo set", setAnd(helloworldSub), 0},
749 }
750
751 for _, bb := range benchmarks {
752 b.Run(bb.name, func(b *testing.B) {
753 b.ReportAllocs()
754
755 for n := 0; n < b.N; n++ {
756 q := bb.q()
757
758 search(b, q, bb.wantFiles)
759 }
760 })
761 }
762}
763
764func TestRawQuerySearch(t *testing.T) {
765 ss := newShardedSearcher(1)
766
767 var nextShardNum int
768 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) {
769 r := &zoekt.Repository{Name: repo}
770 r.RawConfig = rawConfig
771 b := testIndexBuilder(t, r, docs...)
772 shard := searcherForTest(t, b)
773 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard})
774 nextShardNum++
775 }
776 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
777 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
778 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
779
780 cases := []struct {
781 pattern string
782 flags query.RawConfig
783 wantRepos []string
784 wantFiles int
785 }{
786 {
787 pattern: "bas",
788 flags: query.RcOnlyPublic,
789 wantRepos: []string{"public"},
790 wantFiles: 1,
791 },
792 {
793 pattern: "foo",
794 flags: query.RcOnlyPublic,
795 wantRepos: []string{"public", "public_fork"},
796 wantFiles: 2,
797 },
798 {
799 pattern: "foo",
800 flags: query.RcOnlyPublic | query.RcNoForks,
801 wantRepos: []string{"public"},
802 wantFiles: 1,
803 },
804 {
805 pattern: "bar",
806 flags: query.RcOnlyForks,
807 wantRepos: []string{"public_fork"},
808 wantFiles: 1,
809 },
810 {
811 pattern: "bas",
812 flags: query.RcNoArchived,
813 wantRepos: []string{"public"},
814 wantFiles: 1,
815 },
816 {
817 pattern: "foo",
818 flags: query.RcNoForks,
819 wantRepos: []string{"public", "private_archived"},
820 wantFiles: 2,
821 },
822 {
823 pattern: "bas",
824 flags: query.RcOnlyArchived,
825 wantRepos: []string{"private_archived"},
826 wantFiles: 1,
827 },
828 {
829 pattern: "foo",
830 flags: query.RcOnlyPrivate,
831 wantRepos: []string{"private_archived"},
832 wantFiles: 1,
833 },
834 {
835 pattern: "foo",
836 flags: query.RcOnlyPrivate | query.RcNoArchived,
837 wantRepos: []string{},
838 wantFiles: 0,
839 },
840 }
841 for _, c := range cases {
842 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) {
843 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags)
844
845 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{})
846 if err != nil {
847 t.Fatal(err)
848 }
849
850 if got := len(sr.Files); got != c.wantFiles {
851 t.Fatalf("wanted %d, got %d", c.wantFiles, got)
852 }
853
854 if c.wantFiles == 0 {
855 return
856 }
857
858 gotRepos := make([]string, 0, len(sr.RepoURLs))
859 for k := range sr.RepoURLs {
860 gotRepos = append(gotRepos, k)
861 }
862 sort.Strings(gotRepos)
863 sort.Strings(c.wantRepos)
864 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" {
865 t.Fatalf("(-want, +got):\n%s", d)
866 }
867 })
868 }
869}
870
871func TestPrioritySlice(t *testing.T) {
872 p := &prioritySlice{}
873 for step, oper := range []struct {
874 isAppend bool
875 value float64
876 expectedMax float64
877 }{
878 {true, 1, 1},
879 {true, 3, 3},
880 {true, 2, 3},
881 {false, 1, 3},
882 {false, 3, 2},
883 {false, 2, math.Inf(-1)},
884 } {
885 if oper.isAppend {
886 p.append(oper.value)
887 } else {
888 p.remove(oper.value)
889 }
890 max := p.max()
891 if max != oper.expectedMax {
892 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax)
893 }
894 }
895}
896
897func TestSendByRepository(t *testing.T) {
898 wantStats := zoekt.Stats{}
899 wantStats.ShardsScanned = 1
900
901 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this
902 // test.
903 f := func(n1, n2, n3 uint8) bool {
904
905 sr := createMockSearchResult(n1, n2, n3, wantStats)
906
907 mock := &mockSender{}
908 sendByRepository(sr, &zoekt.SearchOptions{}, mock)
909
910 if diff := cmp.Diff(wantStats, mock.stats); diff != "" {
911 t.Logf("-want,+got\n%s", diff)
912 return false
913 }
914
915 nonZero := 0
916 for _, l := range []uint8{n1, n2, n3} {
917 if l > 0 {
918 nonZero++
919 }
920 }
921 if l := len(mock.files); l != nonZero {
922 t.Logf("wanted results from %d repositores, got %d", nonZero, l)
923 return false
924 }
925
926 gotTotal := 0
927 for _, fs := range mock.files {
928 gotTotal += len(fs)
929 }
930 wantTotal := int(n1) + int(n2) + int(n3)
931 if gotTotal != wantTotal {
932 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal)
933 return false
934 }
935
936 for _, fs := range mock.files {
937 if len(fs) == 0 {
938 t.Logf("got search result with 0 file matches after split")
939 return false
940 }
941 }
942 return true
943 }
944
945 if err := quick.Check(f, nil); err != nil {
946 t.Error(err)
947 }
948}
949
950type mockSender struct {
951 stats zoekt.Stats
952 files [][]zoekt.FileMatch
953}
954
955func (s *mockSender) Send(sr *zoekt.SearchResult) {
956 s.stats.Add(sr.Stats)
957 if len(sr.Files) == 0 {
958 return
959 }
960 s.files = append(s.files, sr.Files)
961}
962
963func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult {
964 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)}
965 for i, n := range []uint8{n1, n2, n3} {
966 if n == 0 {
967 continue
968 }
969 tmp := mkSearchResult(int(n), uint32(i))
970 sr.Files = append(sr.Files, tmp.Files...)
971 for k := range tmp.RepoURLs {
972 sr.RepoURLs[k] = ""
973 }
974 }
975 sr.Stats = stats
976 return sr
977}
978
979func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult {
980 if n == 0 {
981 return &zoekt.SearchResult{}
982 }
983 fm := make([]zoekt.FileMatch, 0, n)
984 for i := 0; i < n; i++ {
985 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID})
986 }
987
988 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}}
989}
990
991func TestFileBasedSearch(t *testing.T) {
992 cases := []struct {
993 name string
994 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
995 }{
996 {"Search", testShardedSearch},
997 {"StreamSearch", testShardedStreamSearch},
998 }
999
1000 c1 := []byte("I love bananas without skin")
1001 // -----------0123456789012345678901234567890123456789
1002 c2 := []byte("In Dutch, ananas means pineapple")
1003 // -----------0123456789012345678901234567890123456789
1004 b := testIndexBuilder(t, nil,
1005 zoekt.Document{Name: "f1", Content: c1},
1006 zoekt.Document{Name: "f2", Content: c2},
1007 )
1008
1009 for _, tt := range cases {
1010 for _, useDocumentRanks := range []bool{false, true} {
1011 t.Run(tt.name, func(t *testing.T) {
1012 matches := tt.testShardedSearch(t, &query.Substring{
1013 CaseSensitive: false,
1014 Pattern: "ananas",
1015 }, b, useDocumentRanks)
1016
1017 if len(matches) != 2 {
1018 t.Fatalf("got %v, want 2 matches", matches)
1019 }
1020 if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
1021 t.Fatalf("got %v, want matches {f1,f2}", matches)
1022 }
1023 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
1024 t.Fatalf("got %#v, want offsets 10,8", matches)
1025 }
1026 })
1027 }
1028 }
1029}
1030
1031func TestWordBoundaryRanking(t *testing.T) {
1032 cases := []struct {
1033 name string
1034 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1035 }{
1036 {"Search", testShardedSearch},
1037 {"StreamSearch", testShardedStreamSearch},
1038 }
1039
1040 b := testIndexBuilder(t, nil,
1041 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")},
1042 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
1043 // -----------------------------------------0123456 789012 34567890
1044 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")})
1045
1046 for _, tt := range cases {
1047 for _, useDocumentRanks := range []bool{false, true} {
1048 t.Run(tt.name, func(t *testing.T) {
1049 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks)
1050
1051 if len(files) != 3 {
1052 t.Fatalf("got %#v, want 3 files", files)
1053 }
1054
1055 file0 := files[0]
1056 if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
1057 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
1058 }
1059
1060 if file0.LineMatches[0].LineFragments[0].Offset != 13 {
1061 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0])
1062 }
1063 if file0.LineMatches[1].LineFragments[0].Offset != 7 {
1064 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1])
1065 }
1066 })
1067 }
1068 }
1069}
1070
1071func TestAtomCountScore(t *testing.T) {
1072 cases := []struct {
1073 name string
1074 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1075 }{
1076 {"Search", testShardedSearch},
1077 {"StreamSearch", testShardedStreamSearch},
1078 }
1079
1080 b := testIndexBuilder(t,
1081 &zoekt.Repository{
1082 Branches: []zoekt.RepositoryBranch{
1083 {Name: "branches", Version: "v1"},
1084 {Name: "needle", Version: "v2"},
1085 },
1086 },
1087 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
1088 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
1089 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
1090
1091 for _, tt := range cases {
1092 for _, useDocumentRanks := range []bool{false, true} {
1093 t.Run(tt.name, func(t *testing.T) {
1094 files := tt.testShardedSearch(t,
1095 query.NewOr(
1096 &query.Substring{Pattern: "needle"},
1097 &query.Substring{Pattern: "needle", FileName: true},
1098 &query.Branch{Pattern: "needle"},
1099 ), b, useDocumentRanks)
1100 var got []string
1101 for _, f := range files {
1102 got = append(got, f.FileName)
1103 }
1104 want := []string{"needle-file-branch", "needle-file", "f1"}
1105 if !reflect.DeepEqual(got, want) {
1106 t.Errorf("got %v, want %v", got, want)
1107 }
1108 })
1109 }
1110 }
1111}
1112
1113func TestUseKeywordScoring(t *testing.T) {
1114 b := testIndexBuilder(t,
1115 &zoekt.Repository{},
1116 zoekt.Document{Name: "f1", Content: []byte("one two two three")},
1117 zoekt.Document{Name: "f2", Content: []byte("one two one two")},
1118 zoekt.Document{Name: "f3", Content: []byte("one three three three")})
1119
1120 ss := newShardedSearcher(1)
1121 searcher := searcherForTest(t, b)
1122 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1123
1124 q := query.NewOr(
1125 &query.Substring{Pattern: "one"},
1126 &query.Substring{Pattern: "three"})
1127
1128 opts := zoekt.SearchOptions{
1129 UseKeywordScoring: true,
1130 }
1131
1132 results, err := ss.Search(context.Background(), q, &opts)
1133 if err != nil {
1134 t.Fatal(err)
1135 }
1136
1137 var got []string
1138 for _, f := range results.Files {
1139 got = append(got, f.FileName)
1140 }
1141
1142 want := []string{"f3", "f1", "f2"}
1143 if !reflect.DeepEqual(got, want) {
1144 t.Errorf("got %v, want %v", got, want)
1145 }
1146}
1147
1148func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1149 ss := newShardedSearcher(1)
1150 searcher := searcherForTest(t, ib)
1151 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1152
1153 var files []zoekt.FileMatch
1154 sender := stream.SenderFunc(func(result *zoekt.SearchResult) {
1155 files = append(files, result.Files...)
1156 })
1157
1158 opts := zoekt.SearchOptions{}
1159 if useDocumentRanks {
1160 opts.UseDocumentRanks = true
1161 opts.FlushWallTime = 10 * time.Millisecond
1162 }
1163 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil {
1164 t.Fatal(err)
1165 }
1166 return files
1167}
1168
1169func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1170 ss := newShardedSearcher(1)
1171 searcher := searcherForTest(t, ib)
1172 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1173
1174 opts := zoekt.SearchOptions{}
1175 if useDocumentRanks {
1176 opts.UseDocumentRanks = true
1177 opts.FlushWallTime = 50 * time.Millisecond
1178 }
1179 sres, _ := ss.Search(context.Background(), q, &opts)
1180 return sres.Files
1181}
1182
1183// Ensure we work on empty shard directories.
1184func TestNewDirectorySearcher_empty(t *testing.T) {
1185 ctx := context.Background()
1186
1187 test := func(t *testing.T, ss zoekt.Streamer) {
1188 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1189 if err != nil {
1190 t.Fatal("Search non-nil error", err)
1191 }
1192
1193 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" {
1194 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff)
1195 }
1196
1197 rl, err := ss.List(ctx, &query.Const{Value: true}, nil)
1198 if err != nil {
1199 t.Fatal("List non-nil error", err)
1200 }
1201 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" {
1202 t.Fatalf("List had non empty results (-want, +got):\n%s", diff)
1203 }
1204 }
1205
1206 dir := t.TempDir()
1207 t.Run("blocking", func(t *testing.T) {
1208 ss, err := NewDirectorySearcher(dir)
1209 if err != nil {
1210 t.Fatal(err)
1211 }
1212 t.Cleanup(ss.Close)
1213
1214 // We expect crashes to be empty as soon as NewDirectorySearcher returns
1215 // so we can validate straight away.
1216 test(t, ss)
1217 })
1218
1219 t.Run("fast", func(t *testing.T) {
1220 ss, err := NewDirectorySearcherFast(dir)
1221 if err != nil {
1222 t.Fatal(err)
1223 }
1224 t.Cleanup(ss.Close)
1225
1226 deadline := testDeadline(t, 10*time.Second)
1227
1228 // Wait for scanning of directory to be done. We should be returning
1229 // non-zero crashes until then.
1230 waitForPredicate(deadline, 10*time.Millisecond, func() bool {
1231 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1232 if err != nil {
1233 t.Fatal(err)
1234 }
1235 return res.Stats.Crashes == 0
1236 })
1237
1238 test(t, ss)
1239 })
1240}
1241
1242// testDeadline returns the deadline for t, but ensures it is no longer than
1243// maxTimeout away.
1244func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time {
1245 deadline := time.Now().Add(maxTimeout)
1246 if d, ok := t.Deadline(); ok && d.Before(deadline) {
1247 // give 1s for us to do a final test run
1248 deadline = d.Add(-time.Second)
1249 }
1250 return deadline
1251}
1252
1253func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool {
1254 for time.Now().Before(deadline) {
1255 if pred() {
1256 return true
1257 }
1258
1259 time.Sleep(tick)
1260 }
1261
1262 return pred()
1263}