fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package shards
16
17import (
18 "bytes"
19 "context"
20 "fmt"
21 "hash/fnv"
22 "log"
23 "math"
24 "os"
25 "reflect"
26 "runtime"
27 "sort"
28 "strconv"
29 "testing"
30 "testing/quick"
31 "time"
32
33 "github.com/RoaringBitmap/roaring"
34 "github.com/google/go-cmp/cmp"
35 "github.com/google/go-cmp/cmp/cmpopts"
36 "github.com/grafana/regexp"
37
38 "github.com/sourcegraph/zoekt"
39 "github.com/sourcegraph/zoekt/query"
40 "github.com/sourcegraph/zoekt/stream"
41)
42
43type crashSearcher struct{}
44
45func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
46 panic("search")
47}
48
49func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
50 panic("list")
51}
52
53func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) {
54 return &zoekt.RepoStats{}, nil
55}
56
57func (s *crashSearcher) Close() {}
58
59func (s *crashSearcher) String() string { return "crashSearcher" }
60
61func TestCrashResilience(t *testing.T) {
62 out := &bytes.Buffer{}
63 log.SetOutput(out)
64 defer log.SetOutput(os.Stderr)
65 ss := newShardedSearcher(2)
66 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}})
67
68 var wantCrashes int
69 test := func(t *testing.T) {
70 q := &query.Substring{Pattern: "hoi"}
71 opts := &zoekt.SearchOptions{}
72 if res, err := ss.Search(context.Background(), q, opts); err != nil {
73 t.Fatalf("Search: %v", err)
74 } else if res.Stats.Crashes != wantCrashes {
75 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes)
76 }
77
78 if res, err := ss.List(context.Background(), q, nil); err != nil {
79 t.Fatalf("List: %v", err)
80 } else if res.Crashes != wantCrashes {
81 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes)
82 }
83 }
84
85 // Before we are marked as ready we have one extra crash
86 wantCrashes = 2
87 t.Run("loading", test)
88
89 // After marking as ready we should only have the crashSearcher
90 // contributing.
91 ss.markReady()
92 wantCrashes = 1
93 t.Run("ready", test)
94}
95
96type rankSearcher struct {
97 rank uint16
98 repo *zoekt.Repository
99}
100
101func (s *rankSearcher) Close() {
102}
103
104func (s *rankSearcher) String() string {
105 return ""
106}
107
108func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) {
109 select {
110 case <-ctx.Done():
111 return &zoekt.SearchResult{}, nil
112 default:
113 }
114
115 // Ugly, but without sleep it's too fast, and we can't
116 // simulate the cutoff.
117 time.Sleep(time.Millisecond)
118 return &zoekt.SearchResult{
119 Files: []zoekt.FileMatch{
120 {
121 FileName: fmt.Sprintf("f%d", s.rank),
122 Score: float64(s.rank),
123 },
124 },
125 Stats: zoekt.Stats{
126 MatchCount: 1,
127 },
128 }, nil
129}
130
131func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) {
132 r := zoekt.Repository{}
133 if s.repo != nil {
134 r = *s.repo
135 }
136 r.Rank = s.rank
137 return &zoekt.RepoList{
138 Repos: []*zoekt.RepoListEntry{
139 {Repository: r},
140 },
141 }, nil
142}
143
144func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo }
145
146func TestOrderByShard(t *testing.T) {
147 ss := newShardedSearcher(1)
148
149 n := 10 * runtime.GOMAXPROCS(0)
150 for i := 0; i < n; i++ {
151 ss.replace(map[string]zoekt.Searcher{
152 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)},
153 })
154 }
155
156 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil {
157 t.Errorf("Search: %v", err)
158 } else if len(res.Files) != n {
159 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n)
160 }
161
162 opts := zoekt.SearchOptions{
163 TotalMaxMatchCount: 3,
164 }
165 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts)
166 if err != nil {
167 t.Errorf("Search: %v", err)
168 }
169
170 if len(res.Files) < opts.TotalMaxMatchCount {
171 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount)
172 }
173 if len(res.Files) == n {
174 t.Errorf("got %d results, want < %d", len(res.Files), n)
175 }
176 for i, f := range res.Files {
177 rev := n - 1 - i
178 want := fmt.Sprintf("f%d", rev)
179 got := f.FileName
180
181 if got != want {
182 t.Logf("%d: got %q, want %q", i, got, want)
183 }
184 }
185}
186
187func TestShardedSearcher_Ranking(t *testing.T) {
188 ss := newShardedSearcher(1)
189
190 var nextShardNum int
191 addShard := func(repo string, priority float64, docs ...zoekt.Document) {
192 r := &zoekt.Repository{ID: hash(repo), Name: repo}
193 r.RawConfig = map[string]string{
194 "public": "1",
195 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
196 }
197 b := testIndexBuilder(t, r, docs...)
198 shard := searcherForTest(t, b)
199 ss.replace(map[string]zoekt.Searcher{
200 fmt.Sprintf("key-%d", nextShardNum): shard,
201 })
202 nextShardNum++
203 }
204
205 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
206 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
207 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
208 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
209
210 want := []string{
211 "super-star",
212 "moderately-popular",
213 "weekend-project",
214 "weekend-project-2",
215 }
216
217 var have []string
218 for _, s := range ss.getLoaded().shards {
219 for _, r := range s.repos {
220 have = append(have, r.Name)
221 }
222 }
223
224 if !reflect.DeepEqual(want, have) {
225 t.Fatalf("\nwant: %s\nhave: %s", want, have)
226 }
227}
228
229func TestShardedSearcher_DocumentRanking(t *testing.T) {
230 ss := newShardedSearcher(1)
231
232 var nextShardNum int
233 addShard := func(repo string, priority float64, docs ...zoekt.Document) {
234 r := &zoekt.Repository{ID: hash(repo), Name: repo}
235 r.RawConfig = map[string]string{
236 "public": "1",
237 "priority": strconv.FormatFloat(priority, 'f', 2, 64),
238 }
239 b := testIndexBuilder(t, r, docs...)
240 shard := searcherForTest(t, b)
241 ss.replace(map[string]zoekt.Searcher{
242 fmt.Sprintf("key-%d", nextShardNum): shard,
243 })
244 nextShardNum++
245 }
246
247 addShard("weekend-project", 20, zoekt.Document{Name: "f1", Content: []byte("foobar")})
248 addShard("moderately-popular", 500, zoekt.Document{Name: "f2", Content: []byte("foobaz")})
249 addShard("weekend-project-2", 20, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
250 addShard("super-star", 5000, zoekt.Document{Name: "f4", Content: []byte("foo baz")},
251 zoekt.Document{Name: "f5", Content: []byte("fooooo")})
252
253 // Run a stream search and gather the results
254 var results []*zoekt.SearchResult
255 opts := &zoekt.SearchOptions{
256 UseDocumentRanks: true,
257 FlushWallTime: 100 * time.Millisecond,
258 }
259
260 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts,
261 stream.SenderFunc(func(event *zoekt.SearchResult) {
262 results = append(results, event)
263 }))
264 if err != nil {
265 t.Fatal(err)
266 }
267
268 // There should always be two stream results, first progress-only, then the file results
269 if len(results) != 2 {
270 t.Fatalf("expected 2 streamed results, but got %d", len(results))
271 }
272
273 // The ranking should be determined by whether it's an exact word match,
274 // followed by repository priority
275 want := []string{"f4", "f3", "f5", "f2", "f1"}
276
277 files := results[1].Files
278 got := make([]string, len(files))
279 for i := 0; i < len(files); i++ {
280 got[i] = files[i].FileName
281 }
282
283 if !reflect.DeepEqual(got, want) {
284 t.Errorf("got %v, want %v", got, want)
285 }
286}
287
288func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) {
289 ss := newShardedSearcher(1)
290
291 // namePrefix is so we can create a repo:foo filter and match the same set
292 // of repos.
293 namePrefix := [3]string{"foo", "bar", "baz"}
294
295 repoSetNames := []string{}
296 repoIDs := []uint32{}
297 n := 10 * runtime.GOMAXPROCS(0)
298 for i := 0; i < n; i++ {
299 shardName := fmt.Sprintf("shard%d", i)
300 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i)
301 repoID := hash(repoName)
302
303 if i%3 == 0 {
304 repoSetNames = append(repoSetNames, repoName)
305 repoIDs = append(repoIDs, repoID)
306 }
307
308 ss.replace(map[string]zoekt.Searcher{
309 shardName: &rankSearcher{
310 repo: &zoekt.Repository{ID: repoID, Name: repoName},
311 rank: uint16(n - i),
312 },
313 })
314 }
315
316 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{})
317 if err != nil {
318 t.Errorf("Search: %v", err)
319 }
320 if len(res.Files) != n {
321 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n)
322 }
323
324 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{
325 {Branch: "HEAD", Repos: roaring.New()},
326 }}
327
328 for _, name := range repoSetNames {
329 branchesRepos.List[0].Repos.Add(hash(name))
330 }
331
332 set := query.NewRepoSet(repoSetNames...)
333 sub := &query.Substring{Pattern: "bla"}
334
335 repoIDsQuery := query.NewRepoIDs(repoIDs...)
336 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")}
337
338 queries := []query.Q{
339 query.NewAnd(set, sub),
340 // Test with the same reposet again
341 query.NewAnd(set, sub),
342
343 query.NewAnd(branchesRepos, sub),
344 // Test with the same repoBranches with IDs again
345 query.NewAnd(branchesRepos, sub),
346
347 query.NewAnd(repoIDsQuery, sub),
348 // Test with the same repoIDs again
349 query.NewAnd(repoIDsQuery, sub),
350
351 query.NewAnd(repoQuery, sub),
352 query.NewAnd(repoQuery, sub),
353
354 // List has queries which are just the reposet atoms. We also test twice.
355 set,
356 set,
357 branchesRepos,
358 branchesRepos,
359 repoIDsQuery,
360 repoIDsQuery,
361 repoQuery,
362 repoQuery,
363 }
364
365 for _, q := range queries {
366 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{})
367 if err != nil {
368 t.Errorf("Search(%s): %v", q, err)
369 }
370 // Note: Assertion is based on fact that `rankSearcher` always returns a
371 // result and using repoSet will half the number of results
372 if len(res.Files) != len(repoSetNames) {
373 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames))
374 }
375 }
376}
377
378func hash(name string) uint32 {
379 h := fnv.New32()
380 h.Write([]byte(name))
381 return h.Sum32()
382}
383
384type memSeeker struct {
385 data []byte
386}
387
388func (s *memSeeker) Name() string {
389 return "memseeker"
390}
391
392func (s *memSeeker) Close() {}
393func (s *memSeeker) Read(off, sz uint32) ([]byte, error) {
394 return s.data[off : off+sz], nil
395}
396
397func (s *memSeeker) Size() (uint32, error) {
398 return uint32(len(s.data)), nil
399}
400
401func TestUnloadIndex(t *testing.T) {
402 b := testIndexBuilder(t, nil, zoekt.Document{
403 Name: "filename",
404 Content: []byte("needle needle needle"),
405 })
406
407 var buf bytes.Buffer
408 if err := b.Write(&buf); err != nil {
409 t.Fatal(err)
410 }
411 indexBytes := buf.Bytes()
412 indexFile := &memSeeker{indexBytes}
413 searcher, err := zoekt.NewSearcher(indexFile)
414 if err != nil {
415 t.Fatalf("NewSearcher: %v", err)
416 }
417
418 ss := newShardedSearcher(2)
419 ss.replace(map[string]zoekt.Searcher{"key": searcher})
420
421 var opts zoekt.SearchOptions
422 q := &query.Substring{Pattern: "needle"}
423 res, err := ss.Search(context.Background(), q, &opts)
424 if err != nil {
425 t.Fatalf("Search(%s): %v", q, err)
426 }
427
428 forbidden := byte(29)
429 for i := range indexBytes {
430 // non-ASCII
431 indexBytes[i] = forbidden
432 }
433
434 for _, f := range res.Files {
435 if bytes.Contains(f.Content, []byte{forbidden}) {
436 t.Errorf("found %d in content %q", forbidden, f.Content)
437 }
438 if bytes.Contains(f.Checksum, []byte{forbidden}) {
439 t.Errorf("found %d in checksum %q", forbidden, f.Checksum)
440 }
441
442 for _, l := range f.LineMatches {
443 if bytes.Contains(l.Line, []byte{forbidden}) {
444 t.Errorf("found %d in line %q", forbidden, l.Line)
445 }
446 }
447 }
448}
449
450func TestShardedSearcher_List(t *testing.T) {
451 repos := []*zoekt.Repository{
452 {
453 ID: 1234,
454 Name: "repo-a",
455 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
456 RawConfig: map[string]string{"repoid": "1234"},
457 },
458 {
459 Name: "repo-b",
460 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}},
461 },
462 }
463
464 doc := zoekt.Document{
465 Name: "foo.go",
466 Content: []byte("bar\nbaz"),
467 Branches: []string{"main", "dev"},
468 }
469
470 // Test duplicate removal when ListOptions.Minimal is true and false
471 ss := newShardedSearcher(4)
472 ss.replace(map[string]zoekt.Searcher{
473 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)),
474 "2": searcherForTest(t, testIndexBuilder(t, repos[0])),
475 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)),
476 "4": searcherForTest(t, testIndexBuilder(t, repos[1])),
477 })
478 ss.markReady()
479
480 stats := zoekt.RepoStats{
481 Shards: 2,
482 Documents: 1,
483 IndexBytes: 196,
484 ContentBytes: 13,
485 NewLinesCount: 1,
486 DefaultBranchNewLinesCount: 1,
487 OtherBranchesNewLinesCount: 1,
488 }
489
490 aggStats := stats
491 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works
492 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes.
493
494 for _, tc := range []struct {
495 name string
496 opts *zoekt.ListOptions
497 want *zoekt.RepoList
498 }{
499 {
500 name: "nil opts",
501 opts: nil,
502 want: &zoekt.RepoList{
503 Repos: []*zoekt.RepoListEntry{
504 {
505 Repository: *repos[0],
506 Stats: stats,
507 },
508 {
509 Repository: *repos[1],
510 Stats: stats,
511 },
512 },
513 Stats: aggStats,
514 },
515 },
516 {
517 name: "default",
518 opts: &zoekt.ListOptions{},
519 want: &zoekt.RepoList{
520 Repos: []*zoekt.RepoListEntry{
521 {
522 Repository: *repos[0],
523 Stats: stats,
524 },
525 {
526 Repository: *repos[1],
527 Stats: stats,
528 },
529 },
530 Stats: aggStats,
531 },
532 },
533 {
534 name: "field=repos",
535 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos},
536 want: &zoekt.RepoList{
537 Repos: []*zoekt.RepoListEntry{
538 {
539 Repository: *repos[0],
540 Stats: stats,
541 },
542 {
543 Repository: *repos[1],
544 Stats: stats,
545 },
546 },
547 Stats: aggStats,
548 },
549 },
550 {
551 name: "field=reposmap",
552 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap},
553 want: &zoekt.RepoList{
554 Repos: []*zoekt.RepoListEntry{
555 {
556 Repository: *repos[1],
557 Stats: stats,
558 },
559 },
560 ReposMap: zoekt.ReposMap{
561 repos[0].ID: {
562 HasSymbols: repos[0].HasSymbols,
563 Branches: repos[0].Branches,
564 },
565 },
566 Stats: aggStats,
567 },
568 },
569 } {
570 tc := tc
571 t.Run(tc.name, func(t *testing.T) {
572 t.Parallel()
573
574 q := &query.Repo{Regexp: regexp.MustCompile("repo")}
575
576 res, err := ss.List(context.Background(), q, tc.opts)
577 if err != nil {
578 t.Fatalf("List(%v, %s): %v", q, tc.opts, err)
579 }
580
581 sort.Slice(res.Repos, func(i, j int) bool {
582 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name
583 })
584
585 ignored := []cmp.Option{
586 cmpopts.EquateEmpty(),
587 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"),
588 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"),
589 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"),
590 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"),
591 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"),
592 }
593
594 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" {
595 t.Fatalf("mismatch (-want +got):\n%s", diff)
596 }
597 })
598 }
599}
600
601func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder {
602 b, err := zoekt.NewIndexBuilder(repo)
603 if err != nil {
604 t.Fatalf("NewIndexBuilder: %v", err)
605 }
606
607 for i, d := range docs {
608 if err := b.Add(d); err != nil {
609 t.Fatalf("Add %d: %v", i, err)
610 }
611 }
612 return b
613}
614
615func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher {
616 var buf bytes.Buffer
617 if err := b.Write(&buf); err != nil {
618 t.Fatal(err)
619 }
620 f := &memSeeker{buf.Bytes()}
621
622 searcher, err := zoekt.NewSearcher(f)
623 if err != nil {
624 t.Fatalf("NewSearcher: %v", err)
625 }
626
627 return searcher
628}
629
630func reposForTest(n int) (result []*zoekt.Repository) {
631 for i := 0; i < n; i++ {
632 result = append(result, &zoekt.Repository{
633 ID: uint32(i + 1),
634 Name: fmt.Sprintf("test-repository-%d", i),
635 })
636 }
637 return result
638}
639
640func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher {
641 builder := testIndexBuilder(b, r)
642
643 if err := builder.Add(zoekt.Document{
644 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0),
645 Content: []byte("needle needle needle haystack"),
646 }); err != nil {
647 b.Fatal(err)
648 }
649
650 for i := 1; i < numFiles; i++ {
651 if err := builder.Add(zoekt.Document{
652 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i),
653 Content: []byte("haystack haystack haystack"),
654 }); err != nil {
655 b.Fatal(err)
656 }
657 }
658
659 return searcherForTest(b, builder)
660}
661
662func BenchmarkShardedSearch(b *testing.B) {
663 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0)))
664
665 filesPerRepo := 300
666 repos := reposForTest(3000)
667 var repoSetIDs []uint32
668
669 shards := make(map[string]zoekt.Searcher, len(repos))
670 for i, r := range repos {
671 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo)
672 if i%2 == 0 {
673 repoSetIDs = append(repoSetIDs, r.ID)
674 }
675 }
676
677 ss.replace(shards)
678
679 ctx := context.Background()
680 opts := &zoekt.SearchOptions{}
681
682 needleSub := &query.Substring{Pattern: "needle"}
683 haystackSub := &query.Substring{Pattern: "haystack"}
684 helloworldSub := &query.Substring{Pattern: "helloworld"}
685 haystackCap, err := query.Parse("hay(s(t))(a)ck")
686 if err != nil {
687 b.Fatal(err)
688 }
689
690 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck")
691 if err != nil {
692 b.Fatal(err)
693 }
694
695 setAnd := func(q query.Q) func() query.Q {
696 return func() query.Q {
697 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q)
698 }
699 }
700
701 search := func(b *testing.B, q query.Q, wantFiles int) {
702 b.Helper()
703
704 res, err := ss.Search(ctx, q, opts)
705 if err != nil {
706 b.Fatalf("Search(%s): %v", q, err)
707 }
708 if have := len(res.Files); have != wantFiles {
709 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles)
710 }
711 }
712
713 benchmarks := []struct {
714 name string
715 q func() query.Q
716 wantFiles int
717 }{
718 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo},
719 {"substring no results", func() query.Q { return helloworldSub }, 0},
720 {"substring some results", func() query.Q { return needleSub }, len(repos)},
721
722 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo},
723 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo},
724
725 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo},
726 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)},
727 {"substring no results and repo set", setAnd(helloworldSub), 0},
728 }
729
730 for _, bb := range benchmarks {
731 b.Run(bb.name, func(b *testing.B) {
732 b.ReportAllocs()
733
734 for n := 0; n < b.N; n++ {
735 q := bb.q()
736
737 search(b, q, bb.wantFiles)
738 }
739 })
740 }
741}
742
743func TestRawQuerySearch(t *testing.T) {
744 ss := newShardedSearcher(1)
745
746 var nextShardNum int
747 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) {
748 r := &zoekt.Repository{Name: repo}
749 r.RawConfig = rawConfig
750 b := testIndexBuilder(t, r, docs...)
751 shard := searcherForTest(t, b)
752 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard})
753 nextShardNum++
754 }
755 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")})
756 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")})
757 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")})
758
759 cases := []struct {
760 pattern string
761 flags query.RawConfig
762 wantRepos []string
763 wantFiles int
764 }{
765 {
766 pattern: "bas",
767 flags: query.RcOnlyPublic,
768 wantRepos: []string{"public"},
769 wantFiles: 1,
770 },
771 {
772 pattern: "foo",
773 flags: query.RcOnlyPublic,
774 wantRepos: []string{"public", "public_fork"},
775 wantFiles: 2,
776 },
777 {
778 pattern: "foo",
779 flags: query.RcOnlyPublic | query.RcNoForks,
780 wantRepos: []string{"public"},
781 wantFiles: 1,
782 },
783 {
784 pattern: "bar",
785 flags: query.RcOnlyForks,
786 wantRepos: []string{"public_fork"},
787 wantFiles: 1,
788 },
789 {
790 pattern: "bas",
791 flags: query.RcNoArchived,
792 wantRepos: []string{"public"},
793 wantFiles: 1,
794 },
795 {
796 pattern: "foo",
797 flags: query.RcNoForks,
798 wantRepos: []string{"public", "private_archived"},
799 wantFiles: 2,
800 },
801 {
802 pattern: "bas",
803 flags: query.RcOnlyArchived,
804 wantRepos: []string{"private_archived"},
805 wantFiles: 1,
806 },
807 {
808 pattern: "foo",
809 flags: query.RcOnlyPrivate,
810 wantRepos: []string{"private_archived"},
811 wantFiles: 1,
812 },
813 {
814 pattern: "foo",
815 flags: query.RcOnlyPrivate | query.RcNoArchived,
816 wantRepos: []string{},
817 wantFiles: 0,
818 },
819 }
820 for _, c := range cases {
821 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) {
822 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags)
823
824 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{})
825 if err != nil {
826 t.Fatal(err)
827 }
828
829 if got := len(sr.Files); got != c.wantFiles {
830 t.Fatalf("wanted %d, got %d", c.wantFiles, got)
831 }
832
833 if c.wantFiles == 0 {
834 return
835 }
836
837 gotRepos := make([]string, 0, len(sr.RepoURLs))
838 for k := range sr.RepoURLs {
839 gotRepos = append(gotRepos, k)
840 }
841 sort.Strings(gotRepos)
842 sort.Strings(c.wantRepos)
843 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" {
844 t.Fatalf("(-want, +got):\n%s", d)
845 }
846 })
847 }
848}
849
850func TestPrioritySlice(t *testing.T) {
851 p := &prioritySlice{}
852 for step, oper := range []struct {
853 isAppend bool
854 value float64
855 expectedMax float64
856 }{
857 {true, 1, 1},
858 {true, 3, 3},
859 {true, 2, 3},
860 {false, 1, 3},
861 {false, 3, 2},
862 {false, 2, math.Inf(-1)},
863 } {
864 if oper.isAppend {
865 p.append(oper.value)
866 } else {
867 p.remove(oper.value)
868 }
869 max := p.max()
870 if max != oper.expectedMax {
871 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax)
872 }
873 }
874}
875
876func TestSendByRepository(t *testing.T) {
877 wantStats := zoekt.Stats{}
878 wantStats.ShardsScanned = 1
879
880 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this
881 // test.
882 f := func(n1, n2, n3 uint8) bool {
883 sr := createMockSearchResult(n1, n2, n3, wantStats)
884
885 mock := &mockSender{}
886 sendByRepository(sr, &zoekt.SearchOptions{}, mock)
887
888 if diff := cmp.Diff(wantStats, mock.stats); diff != "" {
889 t.Logf("-want,+got\n%s", diff)
890 return false
891 }
892
893 nonZero := 0
894 for _, l := range []uint8{n1, n2, n3} {
895 if l > 0 {
896 nonZero++
897 }
898 }
899 if l := len(mock.files); l != nonZero {
900 t.Logf("wanted results from %d repositores, got %d", nonZero, l)
901 return false
902 }
903
904 gotTotal := 0
905 for _, fs := range mock.files {
906 gotTotal += len(fs)
907 }
908 wantTotal := int(n1) + int(n2) + int(n3)
909 if gotTotal != wantTotal {
910 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal)
911 return false
912 }
913
914 for _, fs := range mock.files {
915 if len(fs) == 0 {
916 t.Logf("got search result with 0 file matches after split")
917 return false
918 }
919 }
920 return true
921 }
922
923 if err := quick.Check(f, nil); err != nil {
924 t.Error(err)
925 }
926}
927
928type mockSender struct {
929 stats zoekt.Stats
930 files [][]zoekt.FileMatch
931}
932
933func (s *mockSender) Send(sr *zoekt.SearchResult) {
934 s.stats.Add(sr.Stats)
935 if len(sr.Files) == 0 {
936 return
937 }
938 s.files = append(s.files, sr.Files)
939}
940
941func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult {
942 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)}
943 for i, n := range []uint8{n1, n2, n3} {
944 if n == 0 {
945 continue
946 }
947 tmp := mkSearchResult(int(n), uint32(i))
948 sr.Files = append(sr.Files, tmp.Files...)
949 for k := range tmp.RepoURLs {
950 sr.RepoURLs[k] = ""
951 }
952 }
953 sr.Stats = stats
954 return sr
955}
956
957func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult {
958 if n == 0 {
959 return &zoekt.SearchResult{}
960 }
961 fm := make([]zoekt.FileMatch, 0, n)
962 for i := 0; i < n; i++ {
963 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID})
964 }
965
966 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}}
967}
968
969func TestFileBasedSearch(t *testing.T) {
970 cases := []struct {
971 name string
972 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
973 }{
974 {"Search", testShardedSearch},
975 {"StreamSearch", testShardedStreamSearch},
976 }
977
978 c1 := []byte("I love bananas without skin")
979 // -----------0123456789012345678901234567890123456789
980 c2 := []byte("In Dutch, ananas means pineapple")
981 // -----------0123456789012345678901234567890123456789
982 b := testIndexBuilder(t, nil,
983 zoekt.Document{Name: "f1", Content: c1},
984 zoekt.Document{Name: "f2", Content: c2},
985 )
986
987 for _, tt := range cases {
988 for _, useDocumentRanks := range []bool{false, true} {
989 t.Run(tt.name, func(t *testing.T) {
990 matches := tt.testShardedSearch(t, &query.Substring{
991 CaseSensitive: false,
992 Pattern: "ananas",
993 }, b, useDocumentRanks)
994
995 if len(matches) != 2 {
996 t.Fatalf("got %v, want 2 matches", matches)
997 }
998 if matches[0].FileName != "f2" || matches[1].FileName != "f1" {
999 t.Fatalf("got %v, want matches {f1,f2}", matches)
1000 }
1001 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 {
1002 t.Fatalf("got %#v, want offsets 10,8", matches)
1003 }
1004 })
1005 }
1006 }
1007}
1008
1009func TestWordBoundaryRanking(t *testing.T) {
1010 cases := []struct {
1011 name string
1012 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1013 }{
1014 {"Search", testShardedSearch},
1015 {"StreamSearch", testShardedStreamSearch},
1016 }
1017
1018 b := testIndexBuilder(t, nil,
1019 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")},
1020 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")},
1021 // -----------------------------------------0123456 789012 34567890
1022 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")})
1023
1024 for _, tt := range cases {
1025 for _, useDocumentRanks := range []bool{false, true} {
1026 t.Run(tt.name, func(t *testing.T) {
1027 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks)
1028
1029 if len(files) != 3 {
1030 t.Fatalf("got %#v, want 3 files", files)
1031 }
1032
1033 file0 := files[0]
1034 if file0.FileName != "f2" || len(file0.LineMatches) != 3 {
1035 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0)
1036 }
1037
1038 if file0.LineMatches[0].LineFragments[0].Offset != 13 {
1039 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0])
1040 }
1041 if file0.LineMatches[1].LineFragments[0].Offset != 7 {
1042 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1])
1043 }
1044 })
1045 }
1046 }
1047}
1048
1049func TestAtomCountScore(t *testing.T) {
1050 cases := []struct {
1051 name string
1052 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch
1053 }{
1054 {"Search", testShardedSearch},
1055 {"StreamSearch", testShardedStreamSearch},
1056 }
1057
1058 b := testIndexBuilder(t,
1059 &zoekt.Repository{
1060 Branches: []zoekt.RepositoryBranch{
1061 {Name: "branches", Version: "v1"},
1062 {Name: "needle", Version: "v2"},
1063 },
1064 },
1065 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}},
1066 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}},
1067 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}})
1068
1069 for _, tt := range cases {
1070 for _, useDocumentRanks := range []bool{false, true} {
1071 t.Run(tt.name, func(t *testing.T) {
1072 files := tt.testShardedSearch(t,
1073 query.NewOr(
1074 &query.Substring{Pattern: "needle"},
1075 &query.Substring{Pattern: "needle", FileName: true},
1076 &query.Branch{Pattern: "needle"},
1077 ), b, useDocumentRanks)
1078 var got []string
1079 for _, f := range files {
1080 got = append(got, f.FileName)
1081 }
1082 want := []string{"needle-file-branch", "needle-file", "f1"}
1083 if !reflect.DeepEqual(got, want) {
1084 t.Errorf("got %v, want %v", got, want)
1085 }
1086 })
1087 }
1088 }
1089}
1090
1091func TestUseKeywordScoring(t *testing.T) {
1092 b := testIndexBuilder(t,
1093 &zoekt.Repository{},
1094 zoekt.Document{Name: "f1", Content: []byte("one two two three")},
1095 zoekt.Document{Name: "f2", Content: []byte("one two one two")},
1096 zoekt.Document{Name: "f3", Content: []byte("one three three three")})
1097
1098 ss := newShardedSearcher(1)
1099 searcher := searcherForTest(t, b)
1100 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1101
1102 q := query.NewOr(
1103 &query.Substring{Pattern: "one"},
1104 &query.Substring{Pattern: "three"})
1105
1106 opts := zoekt.SearchOptions{
1107 UseKeywordScoring: true,
1108 }
1109
1110 results, err := ss.Search(context.Background(), q, &opts)
1111 if err != nil {
1112 t.Fatal(err)
1113 }
1114
1115 var got []string
1116 for _, f := range results.Files {
1117 got = append(got, f.FileName)
1118 }
1119
1120 want := []string{"f3", "f1", "f2"}
1121 if !reflect.DeepEqual(got, want) {
1122 t.Errorf("got %v, want %v", got, want)
1123 }
1124}
1125
1126func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1127 ss := newShardedSearcher(1)
1128 searcher := searcherForTest(t, ib)
1129 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1130
1131 var files []zoekt.FileMatch
1132 sender := stream.SenderFunc(func(result *zoekt.SearchResult) {
1133 files = append(files, result.Files...)
1134 })
1135
1136 opts := zoekt.SearchOptions{}
1137 if useDocumentRanks {
1138 opts.UseDocumentRanks = true
1139 opts.FlushWallTime = 10 * time.Millisecond
1140 }
1141 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil {
1142 t.Fatal(err)
1143 }
1144 return files
1145}
1146
1147func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch {
1148 ss := newShardedSearcher(1)
1149 searcher := searcherForTest(t, ib)
1150 ss.replace(map[string]zoekt.Searcher{"r1": searcher})
1151
1152 opts := zoekt.SearchOptions{}
1153 if useDocumentRanks {
1154 opts.UseDocumentRanks = true
1155 opts.FlushWallTime = 50 * time.Millisecond
1156 }
1157 sres, _ := ss.Search(context.Background(), q, &opts)
1158 return sres.Files
1159}
1160
1161// Ensure we work on empty shard directories.
1162func TestNewDirectorySearcher_empty(t *testing.T) {
1163 ctx := context.Background()
1164
1165 test := func(t *testing.T, ss zoekt.Streamer) {
1166 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1167 if err != nil {
1168 t.Fatal("Search non-nil error", err)
1169 }
1170
1171 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" {
1172 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff)
1173 }
1174
1175 rl, err := ss.List(ctx, &query.Const{Value: true}, nil)
1176 if err != nil {
1177 t.Fatal("List non-nil error", err)
1178 }
1179 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" {
1180 t.Fatalf("List had non empty results (-want, +got):\n%s", diff)
1181 }
1182 }
1183
1184 dir := t.TempDir()
1185 t.Run("blocking", func(t *testing.T) {
1186 ss, err := NewDirectorySearcher(dir)
1187 if err != nil {
1188 t.Fatal(err)
1189 }
1190 t.Cleanup(ss.Close)
1191
1192 // We expect crashes to be empty as soon as NewDirectorySearcher returns
1193 // so we can validate straight away.
1194 test(t, ss)
1195 })
1196
1197 t.Run("fast", func(t *testing.T) {
1198 ss, err := NewDirectorySearcherFast(dir)
1199 if err != nil {
1200 t.Fatal(err)
1201 }
1202 t.Cleanup(ss.Close)
1203
1204 deadline := testDeadline(t, 10*time.Second)
1205
1206 // Wait for scanning of directory to be done. We should be returning
1207 // non-zero crashes until then.
1208 waitForPredicate(deadline, 10*time.Millisecond, func() bool {
1209 res, err := ss.Search(ctx, &query.Const{Value: true}, nil)
1210 if err != nil {
1211 t.Fatal(err)
1212 }
1213 return res.Stats.Crashes == 0
1214 })
1215
1216 test(t, ss)
1217 })
1218}
1219
1220// testDeadline returns the deadline for t, but ensures it is no longer than
1221// maxTimeout away.
1222func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time {
1223 deadline := time.Now().Add(maxTimeout)
1224 if d, ok := t.Deadline(); ok && d.Before(deadline) {
1225 // give 1s for us to do a final test run
1226 deadline = d.Add(-time.Second)
1227 }
1228 return deadline
1229}
1230
1231func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool {
1232 for time.Now().Before(deadline) {
1233 if pred() {
1234 return true
1235 }
1236
1237 time.Sleep(tick)
1238 }
1239
1240 return pred()
1241}