fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package shards 16 17import ( 18 "bytes" 19 "context" 20 "flag" 21 "fmt" 22 "hash/fnv" 23 "io" 24 "log" 25 "math" 26 "os" 27 "reflect" 28 "runtime" 29 "sort" 30 "strconv" 31 "testing" 32 "testing/quick" 33 "time" 34 35 "github.com/RoaringBitmap/roaring" 36 "github.com/google/go-cmp/cmp" 37 "github.com/google/go-cmp/cmp/cmpopts" 38 "github.com/grafana/regexp" 39 40 "github.com/sourcegraph/zoekt" 41 "github.com/sourcegraph/zoekt/query" 42) 43 44func TestMain(m *testing.M) { 45 flag.Parse() 46 if !testing.Verbose() { 47 log.SetOutput(io.Discard) 48 } 49 os.Exit(m.Run()) 50} 51 52type crashSearcher struct{} 53 54func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 55 panic("search") 56} 57 58func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 59 panic("list") 60} 61 62func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) { 63 return &zoekt.RepoStats{}, nil 64} 65 66func (s *crashSearcher) Close() {} 67 68func (s *crashSearcher) String() string { return "crashSearcher" } 69 70func TestCrashResilience(t *testing.T) { 71 out := &bytes.Buffer{} 72 oldOut := log.Writer() 73 log.SetOutput(out) 74 defer log.SetOutput(oldOut) 75 76 ss := newShardedSearcher(2) 77 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}}) 78 79 var wantCrashes int 80 test := func(t *testing.T) { 81 q := &query.Substring{Pattern: "hoi"} 82 opts := &zoekt.SearchOptions{} 83 if res, err := ss.Search(context.Background(), q, opts); err != nil { 84 t.Fatalf("Search: %v", err) 85 } else if res.Stats.Crashes != wantCrashes { 86 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes) 87 } 88 89 if res, err := ss.List(context.Background(), q, nil); err != nil { 90 t.Fatalf("List: %v", err) 91 } else if res.Crashes != wantCrashes { 92 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes) 93 } 94 } 95 96 // Before we are marked as ready we have one extra crash 97 wantCrashes = 2 98 t.Run("loading", test) 99 100 // After marking as ready we should only have the crashSearcher 101 // contributing. 102 ss.markReady() 103 wantCrashes = 1 104 t.Run("ready", test) 105} 106 107type rankSearcher struct { 108 rank uint16 109 repo *zoekt.Repository 110} 111 112func (s *rankSearcher) Close() { 113} 114 115func (s *rankSearcher) String() string { 116 return "" 117} 118 119func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 120 select { 121 case <-ctx.Done(): 122 return &zoekt.SearchResult{}, nil 123 default: 124 } 125 126 // Ugly, but without sleep it's too fast, and we can't 127 // simulate the cutoff. 128 time.Sleep(time.Millisecond) 129 return &zoekt.SearchResult{ 130 Files: []zoekt.FileMatch{ 131 { 132 FileName: fmt.Sprintf("f%d", s.rank), 133 Score: float64(s.rank), 134 }, 135 }, 136 Stats: zoekt.Stats{ 137 MatchCount: 1, 138 }, 139 }, nil 140} 141 142func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 143 r := zoekt.Repository{} 144 if s.repo != nil { 145 r = *s.repo 146 } 147 r.Rank = s.rank 148 return &zoekt.RepoList{ 149 Repos: []*zoekt.RepoListEntry{ 150 {Repository: r}, 151 }, 152 }, nil 153} 154 155func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo } 156 157func TestOrderByShard(t *testing.T) { 158 ss := newShardedSearcher(1) 159 160 n := 10 * runtime.GOMAXPROCS(0) 161 for i := 0; i < n; i++ { 162 ss.replace(map[string]zoekt.Searcher{ 163 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)}, 164 }) 165 } 166 167 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil { 168 t.Errorf("Search: %v", err) 169 } else if len(res.Files) != n { 170 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n) 171 } 172 173 opts := zoekt.SearchOptions{ 174 TotalMaxMatchCount: 3, 175 } 176 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts) 177 if err != nil { 178 t.Errorf("Search: %v", err) 179 } 180 181 if len(res.Files) < opts.TotalMaxMatchCount { 182 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount) 183 } 184 if len(res.Files) == n { 185 t.Errorf("got %d results, want < %d", len(res.Files), n) 186 } 187 for i, f := range res.Files { 188 rev := n - 1 - i 189 want := fmt.Sprintf("f%d", rev) 190 got := f.FileName 191 192 if got != want { 193 t.Logf("%d: got %q, want %q", i, got, want) 194 } 195 } 196} 197 198func TestShardedSearcher_Ranking(t *testing.T) { 199 ss := newShardedSearcher(1) 200 201 var nextShardNum int 202 addShard := func(repo string, priority float64, docs ...zoekt.Document) { 203 r := &zoekt.Repository{ID: hash(repo), Name: repo} 204 r.RawConfig = map[string]string{ 205 "public": "1", 206 "priority": strconv.FormatFloat(priority, 'f', 2, 64), 207 } 208 b := testIndexBuilder(t, r, docs...) 209 shard := searcherForTest(t, b) 210 ss.replace(map[string]zoekt.Searcher{ 211 fmt.Sprintf("key-%d", nextShardNum): shard, 212 }) 213 nextShardNum++ 214 } 215 216 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 217 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 218 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 219 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 220 221 want := []string{ 222 "super-star", 223 "moderately-popular", 224 "weekend-project", 225 "weekend-project-2", 226 } 227 228 var have []string 229 for _, s := range ss.getLoaded().shards { 230 for _, r := range s.repos { 231 have = append(have, r.Name) 232 } 233 } 234 235 if !reflect.DeepEqual(want, have) { 236 t.Fatalf("\nwant: %s\nhave: %s", want, have) 237 } 238} 239 240func TestShardedSearcher_DocumentRanking(t *testing.T) { 241 ss := newShardedSearcher(1) 242 243 var nextShardNum int 244 addShard := func(repo string, rank uint16, docs ...zoekt.Document) { 245 r := &zoekt.Repository{ID: hash(repo), Name: repo} 246 r.RawConfig = map[string]string{ 247 "public": "1", 248 } 249 r.Rank = rank 250 b := testIndexBuilder(t, r, docs...) 251 shard := searcherForTest(t, b) 252 ss.replace(map[string]zoekt.Searcher{ 253 fmt.Sprintf("key-%d", nextShardNum): shard, 254 }) 255 nextShardNum++ 256 } 257 258 addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 259 addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 260 addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 261 addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 262 zoekt.Document{Name: "f5", Content: []byte("fooooo")}) 263 264 // Run a stream search and gather the results 265 var results []*zoekt.SearchResult 266 opts := &zoekt.SearchOptions{ 267 FlushWallTime: 100 * time.Millisecond, 268 } 269 270 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts, 271 zoekt.SenderFunc(func(event *zoekt.SearchResult) { 272 results = append(results, event) 273 })) 274 if err != nil { 275 t.Fatal(err) 276 } 277 278 // There should always be two stream results, first progress-only, then the file results 279 if len(results) != 2 { 280 t.Fatalf("expected 2 streamed results, but got %d", len(results)) 281 } 282 283 // The ranking should be determined by whether it's an exact word match, 284 // followed by repository priority 285 want := []string{"f4", "f3", "f5", "f2", "f1"} 286 287 files := results[1].Files 288 got := make([]string, len(files)) 289 for i := 0; i < len(files); i++ { 290 got[i] = files[i].FileName 291 } 292 293 if !reflect.DeepEqual(got, want) { 294 t.Errorf("got %v, want %v", got, want) 295 } 296} 297 298func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) { 299 ss := newShardedSearcher(1) 300 301 // namePrefix is so we can create a repo:foo filter and match the same set 302 // of repos. 303 namePrefix := [3]string{"foo", "bar", "baz"} 304 305 repoSetNames := []string{} 306 repoIDs := []uint32{} 307 n := 10 * runtime.GOMAXPROCS(0) 308 for i := 0; i < n; i++ { 309 shardName := fmt.Sprintf("shard%d", i) 310 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i) 311 repoID := hash(repoName) 312 313 if i%3 == 0 { 314 repoSetNames = append(repoSetNames, repoName) 315 repoIDs = append(repoIDs, repoID) 316 } 317 318 ss.replace(map[string]zoekt.Searcher{ 319 shardName: &rankSearcher{ 320 repo: &zoekt.Repository{ID: repoID, Name: repoName}, 321 rank: uint16(n - i), 322 }, 323 }) 324 } 325 326 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}) 327 if err != nil { 328 t.Errorf("Search: %v", err) 329 } 330 if len(res.Files) != n { 331 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n) 332 } 333 334 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{ 335 {Branch: "HEAD", Repos: roaring.New()}, 336 }} 337 338 for _, name := range repoSetNames { 339 branchesRepos.List[0].Repos.Add(hash(name)) 340 } 341 342 set := query.NewRepoSet(repoSetNames...) 343 sub := &query.Substring{Pattern: "bla"} 344 345 repoIDsQuery := query.NewRepoIDs(repoIDs...) 346 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")} 347 348 queries := []query.Q{ 349 query.NewAnd(set, sub), 350 // Test with the same reposet again 351 query.NewAnd(set, sub), 352 353 query.NewAnd(branchesRepos, sub), 354 // Test with the same repoBranches with IDs again 355 query.NewAnd(branchesRepos, sub), 356 357 query.NewAnd(repoIDsQuery, sub), 358 // Test with the same repoIDs again 359 query.NewAnd(repoIDsQuery, sub), 360 361 query.NewAnd(repoQuery, sub), 362 query.NewAnd(repoQuery, sub), 363 364 // List has queries which are just the reposet atoms. We also test twice. 365 set, 366 set, 367 branchesRepos, 368 branchesRepos, 369 repoIDsQuery, 370 repoIDsQuery, 371 repoQuery, 372 repoQuery, 373 } 374 375 for _, q := range queries { 376 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 377 if err != nil { 378 t.Errorf("Search(%s): %v", q, err) 379 } 380 // Note: Assertion is based on fact that `rankSearcher` always returns a 381 // result and using repoSet will half the number of results 382 if len(res.Files) != len(repoSetNames) { 383 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames)) 384 } 385 } 386} 387 388func hash(name string) uint32 { 389 h := fnv.New32() 390 h.Write([]byte(name)) 391 return h.Sum32() 392} 393 394type memSeeker struct { 395 data []byte 396} 397 398func (s *memSeeker) Name() string { 399 return "memseeker" 400} 401 402func (s *memSeeker) Close() {} 403func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 404 return s.data[off : off+sz], nil 405} 406 407func (s *memSeeker) Size() (uint32, error) { 408 return uint32(len(s.data)), nil 409} 410 411func TestUnloadIndex(t *testing.T) { 412 b := testIndexBuilder(t, nil, zoekt.Document{ 413 Name: "filename", 414 Content: []byte("needle needle needle"), 415 }) 416 417 var buf bytes.Buffer 418 if err := b.Write(&buf); err != nil { 419 t.Fatal(err) 420 } 421 indexBytes := buf.Bytes() 422 indexFile := &memSeeker{indexBytes} 423 searcher, err := zoekt.NewSearcher(indexFile) 424 if err != nil { 425 t.Fatalf("NewSearcher: %v", err) 426 } 427 428 ss := newShardedSearcher(2) 429 ss.replace(map[string]zoekt.Searcher{"key": searcher}) 430 431 var opts zoekt.SearchOptions 432 q := &query.Substring{Pattern: "needle"} 433 res, err := ss.Search(context.Background(), q, &opts) 434 if err != nil { 435 t.Fatalf("Search(%s): %v", q, err) 436 } 437 438 forbidden := byte(29) 439 for i := range indexBytes { 440 // non-ASCII 441 indexBytes[i] = forbidden 442 } 443 444 for _, f := range res.Files { 445 if bytes.Contains(f.Content, []byte{forbidden}) { 446 t.Errorf("found %d in content %q", forbidden, f.Content) 447 } 448 if bytes.Contains(f.Checksum, []byte{forbidden}) { 449 t.Errorf("found %d in checksum %q", forbidden, f.Checksum) 450 } 451 452 for _, l := range f.LineMatches { 453 if bytes.Contains(l.Line, []byte{forbidden}) { 454 t.Errorf("found %d in line %q", forbidden, l.Line) 455 } 456 } 457 } 458} 459 460func TestShardedSearcher_List(t *testing.T) { 461 repos := []*zoekt.Repository{ 462 { 463 ID: 1234, 464 Name: "repo-a", 465 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 466 RawConfig: map[string]string{"repoid": "1234"}, 467 }, 468 { 469 Name: "repo-b", 470 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 471 }, 472 } 473 474 doc := zoekt.Document{ 475 Name: "foo.go", 476 Content: []byte("bar\nbaz"), 477 Branches: []string{"main", "dev"}, 478 } 479 480 // Test duplicate removal when ListOptions.Minimal is true and false 481 ss := newShardedSearcher(4) 482 ss.replace(map[string]zoekt.Searcher{ 483 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)), 484 "2": searcherForTest(t, testIndexBuilder(t, repos[0])), 485 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)), 486 "4": searcherForTest(t, testIndexBuilder(t, repos[1])), 487 }) 488 ss.markReady() 489 490 stats := zoekt.RepoStats{ 491 Shards: 2, 492 Documents: 1, 493 IndexBytes: 196, 494 ContentBytes: 13, 495 NewLinesCount: 1, 496 DefaultBranchNewLinesCount: 1, 497 OtherBranchesNewLinesCount: 1, 498 } 499 500 aggStats := stats 501 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works 502 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes. 503 504 for _, tc := range []struct { 505 name string 506 opts *zoekt.ListOptions 507 want *zoekt.RepoList 508 }{ 509 { 510 name: "nil opts", 511 opts: nil, 512 want: &zoekt.RepoList{ 513 Repos: []*zoekt.RepoListEntry{ 514 { 515 Repository: *repos[0], 516 Stats: stats, 517 }, 518 { 519 Repository: *repos[1], 520 Stats: stats, 521 }, 522 }, 523 Stats: aggStats, 524 }, 525 }, 526 { 527 name: "default", 528 opts: &zoekt.ListOptions{}, 529 want: &zoekt.RepoList{ 530 Repos: []*zoekt.RepoListEntry{ 531 { 532 Repository: *repos[0], 533 Stats: stats, 534 }, 535 { 536 Repository: *repos[1], 537 Stats: stats, 538 }, 539 }, 540 Stats: aggStats, 541 }, 542 }, 543 { 544 name: "field=repos", 545 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos}, 546 want: &zoekt.RepoList{ 547 Repos: []*zoekt.RepoListEntry{ 548 { 549 Repository: *repos[0], 550 Stats: stats, 551 }, 552 { 553 Repository: *repos[1], 554 Stats: stats, 555 }, 556 }, 557 Stats: aggStats, 558 }, 559 }, 560 { 561 name: "field=reposmap", 562 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}, 563 want: &zoekt.RepoList{ 564 Repos: []*zoekt.RepoListEntry{ 565 { 566 Repository: *repos[1], 567 Stats: stats, 568 }, 569 }, 570 ReposMap: zoekt.ReposMap{ 571 repos[0].ID: { 572 HasSymbols: repos[0].HasSymbols, 573 Branches: repos[0].Branches, 574 }, 575 }, 576 Stats: aggStats, 577 }, 578 }, 579 } { 580 tc := tc 581 t.Run(tc.name, func(t *testing.T) { 582 t.Parallel() 583 584 q := &query.Repo{Regexp: regexp.MustCompile("repo")} 585 586 res, err := ss.List(context.Background(), q, tc.opts) 587 if err != nil { 588 t.Fatalf("List(%v, %s): %v", q, tc.opts, err) 589 } 590 591 sort.Slice(res.Repos, func(i, j int) bool { 592 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name 593 }) 594 595 ignored := []cmp.Option{ 596 cmpopts.EquateEmpty(), 597 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 598 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 599 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 600 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 601 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 602 } 603 604 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" { 605 t.Fatalf("mismatch (-want +got):\n%s", diff) 606 } 607 }) 608 } 609} 610 611func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder { 612 b, err := zoekt.NewIndexBuilder(repo) 613 if err != nil { 614 t.Fatalf("NewIndexBuilder: %v", err) 615 } 616 617 for i, d := range docs { 618 if err := b.Add(d); err != nil { 619 t.Fatalf("Add %d: %v", i, err) 620 } 621 } 622 return b 623} 624 625func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { 626 var buf bytes.Buffer 627 if err := b.Write(&buf); err != nil { 628 t.Fatal(err) 629 } 630 f := &memSeeker{buf.Bytes()} 631 632 searcher, err := zoekt.NewSearcher(f) 633 if err != nil { 634 t.Fatalf("NewSearcher: %v", err) 635 } 636 637 return searcher 638} 639 640func reposForTest(n int) (result []*zoekt.Repository) { 641 for i := 0; i < n; i++ { 642 result = append(result, &zoekt.Repository{ 643 ID: uint32(i + 1), 644 Name: fmt.Sprintf("test-repository-%d", i), 645 }) 646 } 647 return result 648} 649 650func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { 651 builder := testIndexBuilder(b, r) 652 653 if err := builder.Add(zoekt.Document{ 654 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), 655 Content: []byte("needle needle needle haystack"), 656 }); err != nil { 657 b.Fatal(err) 658 } 659 660 for i := 1; i < numFiles; i++ { 661 if err := builder.Add(zoekt.Document{ 662 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), 663 Content: []byte("haystack haystack haystack"), 664 }); err != nil { 665 b.Fatal(err) 666 } 667 } 668 669 return searcherForTest(b, builder) 670} 671 672func BenchmarkShardedSearch(b *testing.B) { 673 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0))) 674 675 filesPerRepo := 300 676 repos := reposForTest(3000) 677 var repoSetIDs []uint32 678 679 shards := make(map[string]zoekt.Searcher, len(repos)) 680 for i, r := range repos { 681 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo) 682 if i%2 == 0 { 683 repoSetIDs = append(repoSetIDs, r.ID) 684 } 685 } 686 687 ss.replace(shards) 688 689 ctx := context.Background() 690 opts := &zoekt.SearchOptions{} 691 692 needleSub := &query.Substring{Pattern: "needle"} 693 haystackSub := &query.Substring{Pattern: "haystack"} 694 helloworldSub := &query.Substring{Pattern: "helloworld"} 695 haystackCap, err := query.Parse("hay(s(t))(a)ck") 696 if err != nil { 697 b.Fatal(err) 698 } 699 700 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck") 701 if err != nil { 702 b.Fatal(err) 703 } 704 705 setAnd := func(q query.Q) func() query.Q { 706 return func() query.Q { 707 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q) 708 } 709 } 710 711 search := func(b *testing.B, q query.Q, wantFiles int) { 712 b.Helper() 713 714 res, err := ss.Search(ctx, q, opts) 715 if err != nil { 716 b.Fatalf("Search(%s): %v", q, err) 717 } 718 if have := len(res.Files); have != wantFiles { 719 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles) 720 } 721 } 722 723 benchmarks := []struct { 724 name string 725 q func() query.Q 726 wantFiles int 727 }{ 728 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo}, 729 {"substring no results", func() query.Q { return helloworldSub }, 0}, 730 {"substring some results", func() query.Q { return needleSub }, len(repos)}, 731 732 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo}, 733 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo}, 734 735 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo}, 736 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)}, 737 {"substring no results and repo set", setAnd(helloworldSub), 0}, 738 } 739 740 for _, bb := range benchmarks { 741 b.Run(bb.name, func(b *testing.B) { 742 b.ReportAllocs() 743 744 for n := 0; n < b.N; n++ { 745 q := bb.q() 746 747 search(b, q, bb.wantFiles) 748 } 749 }) 750 } 751} 752 753func TestRawQuerySearch(t *testing.T) { 754 ss := newShardedSearcher(1) 755 756 var nextShardNum int 757 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) { 758 r := &zoekt.Repository{Name: repo} 759 r.RawConfig = rawConfig 760 b := testIndexBuilder(t, r, docs...) 761 shard := searcherForTest(t, b) 762 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) 763 nextShardNum++ 764 } 765 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 766 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 767 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 768 769 cases := []struct { 770 pattern string 771 flags query.RawConfig 772 wantRepos []string 773 wantFiles int 774 }{ 775 { 776 pattern: "bas", 777 flags: query.RcOnlyPublic, 778 wantRepos: []string{"public"}, 779 wantFiles: 1, 780 }, 781 { 782 pattern: "foo", 783 flags: query.RcOnlyPublic, 784 wantRepos: []string{"public", "public_fork"}, 785 wantFiles: 2, 786 }, 787 { 788 pattern: "foo", 789 flags: query.RcOnlyPublic | query.RcNoForks, 790 wantRepos: []string{"public"}, 791 wantFiles: 1, 792 }, 793 { 794 pattern: "bar", 795 flags: query.RcOnlyForks, 796 wantRepos: []string{"public_fork"}, 797 wantFiles: 1, 798 }, 799 { 800 pattern: "bas", 801 flags: query.RcNoArchived, 802 wantRepos: []string{"public"}, 803 wantFiles: 1, 804 }, 805 { 806 pattern: "foo", 807 flags: query.RcNoForks, 808 wantRepos: []string{"public", "private_archived"}, 809 wantFiles: 2, 810 }, 811 { 812 pattern: "bas", 813 flags: query.RcOnlyArchived, 814 wantRepos: []string{"private_archived"}, 815 wantFiles: 1, 816 }, 817 { 818 pattern: "foo", 819 flags: query.RcOnlyPrivate, 820 wantRepos: []string{"private_archived"}, 821 wantFiles: 1, 822 }, 823 { 824 pattern: "foo", 825 flags: query.RcOnlyPrivate | query.RcNoArchived, 826 wantRepos: []string{}, 827 wantFiles: 0, 828 }, 829 } 830 for _, c := range cases { 831 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) { 832 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags) 833 834 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 835 if err != nil { 836 t.Fatal(err) 837 } 838 839 if got := len(sr.Files); got != c.wantFiles { 840 t.Fatalf("wanted %d, got %d", c.wantFiles, got) 841 } 842 843 if c.wantFiles == 0 { 844 return 845 } 846 847 gotRepos := make([]string, 0, len(sr.RepoURLs)) 848 for k := range sr.RepoURLs { 849 gotRepos = append(gotRepos, k) 850 } 851 sort.Strings(gotRepos) 852 sort.Strings(c.wantRepos) 853 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" { 854 t.Fatalf("(-want, +got):\n%s", d) 855 } 856 }) 857 } 858} 859 860func TestPrioritySlice(t *testing.T) { 861 p := &prioritySlice{} 862 for step, oper := range []struct { 863 isAppend bool 864 value float64 865 expectedMax float64 866 }{ 867 {true, 1, 1}, 868 {true, 3, 3}, 869 {true, 2, 3}, 870 {false, 1, 3}, 871 {false, 3, 2}, 872 {false, 2, math.Inf(-1)}, 873 } { 874 if oper.isAppend { 875 p.append(oper.value) 876 } else { 877 p.remove(oper.value) 878 } 879 max := p.max() 880 if max != oper.expectedMax { 881 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax) 882 } 883 } 884} 885 886func TestSendByRepository(t *testing.T) { 887 wantStats := zoekt.Stats{} 888 wantStats.ShardsScanned = 1 889 890 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this 891 // test. 892 f := func(n1, n2, n3 uint8) bool { 893 sr := createMockSearchResult(n1, n2, n3, wantStats) 894 895 mock := &mockSender{} 896 sendByRepository(sr, &zoekt.SearchOptions{}, mock) 897 898 if diff := cmp.Diff(wantStats, mock.stats); diff != "" { 899 t.Logf("-want,+got\n%s", diff) 900 return false 901 } 902 903 nonZero := 0 904 for _, l := range []uint8{n1, n2, n3} { 905 if l > 0 { 906 nonZero++ 907 } 908 } 909 if l := len(mock.files); l != nonZero { 910 t.Logf("wanted results from %d repositores, got %d", nonZero, l) 911 return false 912 } 913 914 gotTotal := 0 915 for _, fs := range mock.files { 916 gotTotal += len(fs) 917 } 918 wantTotal := int(n1) + int(n2) + int(n3) 919 if gotTotal != wantTotal { 920 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal) 921 return false 922 } 923 924 for _, fs := range mock.files { 925 if len(fs) == 0 { 926 t.Logf("got search result with 0 file matches after split") 927 return false 928 } 929 } 930 return true 931 } 932 933 if err := quick.Check(f, nil); err != nil { 934 t.Error(err) 935 } 936} 937 938type mockSender struct { 939 stats zoekt.Stats 940 files [][]zoekt.FileMatch 941} 942 943func (s *mockSender) Send(sr *zoekt.SearchResult) { 944 s.stats.Add(sr.Stats) 945 if len(sr.Files) == 0 { 946 return 947 } 948 s.files = append(s.files, sr.Files) 949} 950 951func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult { 952 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)} 953 for i, n := range []uint8{n1, n2, n3} { 954 if n == 0 { 955 continue 956 } 957 tmp := mkSearchResult(int(n), uint32(i)) 958 sr.Files = append(sr.Files, tmp.Files...) 959 for k := range tmp.RepoURLs { 960 sr.RepoURLs[k] = "" 961 } 962 } 963 sr.Stats = stats 964 return sr 965} 966 967func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { 968 if n == 0 { 969 return &zoekt.SearchResult{} 970 } 971 fm := make([]zoekt.FileMatch, 0, n) 972 for i := 0; i < n; i++ { 973 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID}) 974 } 975 976 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}} 977} 978 979func TestFileBasedSearch(t *testing.T) { 980 cases := []struct { 981 name string 982 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 983 }{ 984 {"Search", testShardedSearch}, 985 {"StreamSearch", testShardedStreamSearch}, 986 } 987 988 c1 := []byte("I love bananas without skin") 989 // -----------0123456789012345678901234567890123456789 990 c2 := []byte("In Dutch, ananas means pineapple") 991 // -----------0123456789012345678901234567890123456789 992 b := testIndexBuilder(t, nil, 993 zoekt.Document{Name: "f1", Content: c1}, 994 zoekt.Document{Name: "f2", Content: c2}, 995 ) 996 997 for _, tt := range cases { 998 for _, useDocumentRanks := range []bool{false, true} { 999 t.Run(tt.name, func(t *testing.T) { 1000 matches := tt.testShardedSearch(t, &query.Substring{ 1001 CaseSensitive: false, 1002 Pattern: "ananas", 1003 }, b, useDocumentRanks) 1004 1005 if len(matches) != 2 { 1006 t.Fatalf("got %v, want 2 matches", matches) 1007 } 1008 if matches[0].FileName != "f2" || matches[1].FileName != "f1" { 1009 t.Fatalf("got %v, want matches {f1,f2}", matches) 1010 } 1011 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 { 1012 t.Fatalf("got %#v, want offsets 10,8", matches) 1013 } 1014 }) 1015 } 1016 } 1017} 1018 1019func TestWordBoundaryRanking(t *testing.T) { 1020 cases := []struct { 1021 name string 1022 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1023 }{ 1024 {"Search", testShardedSearch}, 1025 {"StreamSearch", testShardedStreamSearch}, 1026 } 1027 1028 b := testIndexBuilder(t, nil, 1029 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")}, 1030 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, 1031 // -----------------------------------------0123456 789012 34567890 1032 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")}) 1033 1034 for _, tt := range cases { 1035 for _, useDocumentRanks := range []bool{false, true} { 1036 t.Run(tt.name, func(t *testing.T) { 1037 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks) 1038 1039 if len(files) != 3 { 1040 t.Fatalf("got %#v, want 3 files", files) 1041 } 1042 1043 file0 := files[0] 1044 if file0.FileName != "f2" || len(file0.LineMatches) != 3 { 1045 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0) 1046 } 1047 1048 if file0.LineMatches[0].LineFragments[0].Offset != 13 { 1049 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0]) 1050 } 1051 if file0.LineMatches[1].LineFragments[0].Offset != 7 { 1052 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1]) 1053 } 1054 }) 1055 } 1056 } 1057} 1058 1059func TestAtomCountScore(t *testing.T) { 1060 cases := []struct { 1061 name string 1062 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1063 }{ 1064 {"Search", testShardedSearch}, 1065 {"StreamSearch", testShardedStreamSearch}, 1066 } 1067 1068 b := testIndexBuilder(t, 1069 &zoekt.Repository{ 1070 Branches: []zoekt.RepositoryBranch{ 1071 {Name: "branches", Version: "v1"}, 1072 {Name: "needle", Version: "v2"}, 1073 }, 1074 }, 1075 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, 1076 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, 1077 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) 1078 1079 for _, tt := range cases { 1080 for _, useDocumentRanks := range []bool{false, true} { 1081 t.Run(tt.name, func(t *testing.T) { 1082 files := tt.testShardedSearch(t, 1083 query.NewOr( 1084 &query.Substring{Pattern: "needle"}, 1085 &query.Substring{Pattern: "needle", FileName: true}, 1086 &query.Branch{Pattern: "needle"}, 1087 ), b, useDocumentRanks) 1088 var got []string 1089 for _, f := range files { 1090 got = append(got, f.FileName) 1091 } 1092 want := []string{"needle-file-branch", "needle-file", "f1"} 1093 if !reflect.DeepEqual(got, want) { 1094 t.Errorf("got %v, want %v", got, want) 1095 } 1096 }) 1097 } 1098 } 1099} 1100 1101func TestUseBM25Scoring(t *testing.T) { 1102 b := testIndexBuilder(t, 1103 &zoekt.Repository{}, 1104 zoekt.Document{Name: "f1", Content: []byte("one two two three")}, 1105 zoekt.Document{Name: "f2", Content: []byte("one two one two")}, 1106 zoekt.Document{Name: "f3", Content: []byte("one three three three")}) 1107 1108 ss := newShardedSearcher(1) 1109 searcher := searcherForTest(t, b) 1110 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1111 1112 q := query.NewOr( 1113 &query.Substring{Pattern: "one"}, 1114 &query.Substring{Pattern: "three"}) 1115 1116 opts := zoekt.SearchOptions{ 1117 UseBM25Scoring: true, 1118 } 1119 1120 results, err := ss.Search(context.Background(), q, &opts) 1121 if err != nil { 1122 t.Fatal(err) 1123 } 1124 1125 var got []string 1126 for _, f := range results.Files { 1127 got = append(got, f.FileName) 1128 } 1129 1130 want := []string{"f3", "f1", "f2"} 1131 if !reflect.DeepEqual(got, want) { 1132 t.Errorf("got %v, want %v", got, want) 1133 } 1134} 1135 1136func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1137 ss := newShardedSearcher(1) 1138 searcher := searcherForTest(t, ib) 1139 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1140 1141 var files []zoekt.FileMatch 1142 sender := zoekt.SenderFunc(func(result *zoekt.SearchResult) { 1143 files = append(files, result.Files...) 1144 }) 1145 1146 opts := zoekt.SearchOptions{} 1147 if useDocumentRanks { 1148 opts.FlushWallTime = 10 * time.Millisecond 1149 } 1150 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil { 1151 t.Fatal(err) 1152 } 1153 return files 1154} 1155 1156func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1157 ss := newShardedSearcher(1) 1158 searcher := searcherForTest(t, ib) 1159 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1160 1161 opts := zoekt.SearchOptions{} 1162 if useDocumentRanks { 1163 opts.FlushWallTime = 50 * time.Millisecond 1164 } 1165 sres, _ := ss.Search(context.Background(), q, &opts) 1166 return sres.Files 1167} 1168 1169// Ensure we work on empty shard directories. 1170func TestNewDirectorySearcher_empty(t *testing.T) { 1171 ctx := context.Background() 1172 1173 test := func(t *testing.T, ss zoekt.Streamer) { 1174 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1175 if err != nil { 1176 t.Fatal("Search non-nil error", err) 1177 } 1178 1179 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" { 1180 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff) 1181 } 1182 1183 rl, err := ss.List(ctx, &query.Const{Value: true}, nil) 1184 if err != nil { 1185 t.Fatal("List non-nil error", err) 1186 } 1187 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" { 1188 t.Fatalf("List had non empty results (-want, +got):\n%s", diff) 1189 } 1190 } 1191 1192 dir := t.TempDir() 1193 t.Run("blocking", func(t *testing.T) { 1194 ss, err := NewDirectorySearcher(dir) 1195 if err != nil { 1196 t.Fatal(err) 1197 } 1198 t.Cleanup(ss.Close) 1199 1200 // We expect crashes to be empty as soon as NewDirectorySearcher returns 1201 // so we can validate straight away. 1202 test(t, ss) 1203 }) 1204 1205 t.Run("fast", func(t *testing.T) { 1206 ss, err := NewDirectorySearcherFast(dir) 1207 if err != nil { 1208 t.Fatal(err) 1209 } 1210 t.Cleanup(ss.Close) 1211 1212 deadline := testDeadline(t, 10*time.Second) 1213 1214 // Wait for scanning of directory to be done. We should be returning 1215 // non-zero crashes until then. 1216 waitForPredicate(deadline, 10*time.Millisecond, func() bool { 1217 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1218 if err != nil { 1219 t.Fatal(err) 1220 } 1221 return res.Stats.Crashes == 0 1222 }) 1223 1224 test(t, ss) 1225 }) 1226} 1227 1228// testDeadline returns the deadline for t, but ensures it is no longer than 1229// maxTimeout away. 1230func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time { 1231 deadline := time.Now().Add(maxTimeout) 1232 if d, ok := t.Deadline(); ok && d.Before(deadline) { 1233 // give 1s for us to do a final test run 1234 deadline = d.Add(-time.Second) 1235 } 1236 return deadline 1237} 1238 1239func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool { 1240 for time.Now().Before(deadline) { 1241 if pred() { 1242 return true 1243 } 1244 1245 time.Sleep(tick) 1246 } 1247 1248 return pred() 1249}