fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package shards 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "hash/fnv" 22 "log" 23 "math" 24 "os" 25 "reflect" 26 "runtime" 27 "sort" 28 "strconv" 29 "testing" 30 "testing/quick" 31 "time" 32 33 "github.com/RoaringBitmap/roaring" 34 "github.com/google/go-cmp/cmp" 35 "github.com/google/go-cmp/cmp/cmpopts" 36 "github.com/grafana/regexp" 37 38 "github.com/sourcegraph/zoekt" 39 "github.com/sourcegraph/zoekt/query" 40) 41 42type crashSearcher struct{} 43 44func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 45 panic("search") 46} 47 48func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 49 panic("list") 50} 51 52func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) { 53 return &zoekt.RepoStats{}, nil 54} 55 56func (s *crashSearcher) Close() {} 57 58func (s *crashSearcher) String() string { return "crashSearcher" } 59 60func TestCrashResilience(t *testing.T) { 61 out := &bytes.Buffer{} 62 log.SetOutput(out) 63 defer log.SetOutput(os.Stderr) 64 ss := newShardedSearcher(2) 65 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}}) 66 67 var wantCrashes int 68 test := func(t *testing.T) { 69 q := &query.Substring{Pattern: "hoi"} 70 opts := &zoekt.SearchOptions{} 71 if res, err := ss.Search(context.Background(), q, opts); err != nil { 72 t.Fatalf("Search: %v", err) 73 } else if res.Stats.Crashes != wantCrashes { 74 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes) 75 } 76 77 if res, err := ss.List(context.Background(), q, nil); err != nil { 78 t.Fatalf("List: %v", err) 79 } else if res.Crashes != wantCrashes { 80 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes) 81 } 82 } 83 84 // Before we are marked as ready we have one extra crash 85 wantCrashes = 2 86 t.Run("loading", test) 87 88 // After marking as ready we should only have the crashSearcher 89 // contributing. 90 ss.markReady() 91 wantCrashes = 1 92 t.Run("ready", test) 93} 94 95type rankSearcher struct { 96 rank uint16 97 repo *zoekt.Repository 98} 99 100func (s *rankSearcher) Close() { 101} 102 103func (s *rankSearcher) String() string { 104 return "" 105} 106 107func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 108 select { 109 case <-ctx.Done(): 110 return &zoekt.SearchResult{}, nil 111 default: 112 } 113 114 // Ugly, but without sleep it's too fast, and we can't 115 // simulate the cutoff. 116 time.Sleep(time.Millisecond) 117 return &zoekt.SearchResult{ 118 Files: []zoekt.FileMatch{ 119 { 120 FileName: fmt.Sprintf("f%d", s.rank), 121 Score: float64(s.rank), 122 }, 123 }, 124 Stats: zoekt.Stats{ 125 MatchCount: 1, 126 }, 127 }, nil 128} 129 130func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 131 r := zoekt.Repository{} 132 if s.repo != nil { 133 r = *s.repo 134 } 135 r.Rank = s.rank 136 return &zoekt.RepoList{ 137 Repos: []*zoekt.RepoListEntry{ 138 {Repository: r}, 139 }, 140 }, nil 141} 142 143func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo } 144 145func TestOrderByShard(t *testing.T) { 146 ss := newShardedSearcher(1) 147 148 n := 10 * runtime.GOMAXPROCS(0) 149 for i := 0; i < n; i++ { 150 ss.replace(map[string]zoekt.Searcher{ 151 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)}, 152 }) 153 } 154 155 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil { 156 t.Errorf("Search: %v", err) 157 } else if len(res.Files) != n { 158 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n) 159 } 160 161 opts := zoekt.SearchOptions{ 162 TotalMaxMatchCount: 3, 163 } 164 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts) 165 if err != nil { 166 t.Errorf("Search: %v", err) 167 } 168 169 if len(res.Files) < opts.TotalMaxMatchCount { 170 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount) 171 } 172 if len(res.Files) == n { 173 t.Errorf("got %d results, want < %d", len(res.Files), n) 174 } 175 for i, f := range res.Files { 176 rev := n - 1 - i 177 want := fmt.Sprintf("f%d", rev) 178 got := f.FileName 179 180 if got != want { 181 t.Logf("%d: got %q, want %q", i, got, want) 182 } 183 } 184} 185 186func TestShardedSearcher_Ranking(t *testing.T) { 187 ss := newShardedSearcher(1) 188 189 var nextShardNum int 190 addShard := func(repo string, priority float64, docs ...zoekt.Document) { 191 r := &zoekt.Repository{ID: hash(repo), Name: repo} 192 r.RawConfig = map[string]string{ 193 "public": "1", 194 "priority": strconv.FormatFloat(priority, 'f', 2, 64), 195 } 196 b := testIndexBuilder(t, r, docs...) 197 shard := searcherForTest(t, b) 198 ss.replace(map[string]zoekt.Searcher{ 199 fmt.Sprintf("key-%d", nextShardNum): shard, 200 }) 201 nextShardNum++ 202 } 203 204 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 205 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 206 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 207 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 208 209 want := []string{ 210 "super-star", 211 "moderately-popular", 212 "weekend-project", 213 "weekend-project-2", 214 } 215 216 var have []string 217 for _, s := range ss.getLoaded().shards { 218 for _, r := range s.repos { 219 have = append(have, r.Name) 220 } 221 } 222 223 if !reflect.DeepEqual(want, have) { 224 t.Fatalf("\nwant: %s\nhave: %s", want, have) 225 } 226} 227 228func TestShardedSearcher_DocumentRanking(t *testing.T) { 229 ss := newShardedSearcher(1) 230 231 var nextShardNum int 232 addShard := func(repo string, rank uint16, docs ...zoekt.Document) { 233 r := &zoekt.Repository{ID: hash(repo), Name: repo} 234 r.RawConfig = map[string]string{ 235 "public": "1", 236 } 237 r.Rank = rank 238 b := testIndexBuilder(t, r, docs...) 239 shard := searcherForTest(t, b) 240 ss.replace(map[string]zoekt.Searcher{ 241 fmt.Sprintf("key-%d", nextShardNum): shard, 242 }) 243 nextShardNum++ 244 } 245 246 addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 247 addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 248 addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 249 addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 250 zoekt.Document{Name: "f5", Content: []byte("fooooo")}) 251 252 // Run a stream search and gather the results 253 var results []*zoekt.SearchResult 254 opts := &zoekt.SearchOptions{ 255 UseDocumentRanks: true, 256 FlushWallTime: 100 * time.Millisecond, 257 } 258 259 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts, 260 zoekt.SenderFunc(func(event *zoekt.SearchResult) { 261 results = append(results, event) 262 })) 263 if err != nil { 264 t.Fatal(err) 265 } 266 267 // There should always be two stream results, first progress-only, then the file results 268 if len(results) != 2 { 269 t.Fatalf("expected 2 streamed results, but got %d", len(results)) 270 } 271 272 // The ranking should be determined by whether it's an exact word match, 273 // followed by repository priority 274 want := []string{"f4", "f3", "f5", "f2", "f1"} 275 276 files := results[1].Files 277 got := make([]string, len(files)) 278 for i := 0; i < len(files); i++ { 279 got[i] = files[i].FileName 280 } 281 282 if !reflect.DeepEqual(got, want) { 283 t.Errorf("got %v, want %v", got, want) 284 } 285} 286 287func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) { 288 ss := newShardedSearcher(1) 289 290 // namePrefix is so we can create a repo:foo filter and match the same set 291 // of repos. 292 namePrefix := [3]string{"foo", "bar", "baz"} 293 294 repoSetNames := []string{} 295 repoIDs := []uint32{} 296 n := 10 * runtime.GOMAXPROCS(0) 297 for i := 0; i < n; i++ { 298 shardName := fmt.Sprintf("shard%d", i) 299 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i) 300 repoID := hash(repoName) 301 302 if i%3 == 0 { 303 repoSetNames = append(repoSetNames, repoName) 304 repoIDs = append(repoIDs, repoID) 305 } 306 307 ss.replace(map[string]zoekt.Searcher{ 308 shardName: &rankSearcher{ 309 repo: &zoekt.Repository{ID: repoID, Name: repoName}, 310 rank: uint16(n - i), 311 }, 312 }) 313 } 314 315 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}) 316 if err != nil { 317 t.Errorf("Search: %v", err) 318 } 319 if len(res.Files) != n { 320 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n) 321 } 322 323 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{ 324 {Branch: "HEAD", Repos: roaring.New()}, 325 }} 326 327 for _, name := range repoSetNames { 328 branchesRepos.List[0].Repos.Add(hash(name)) 329 } 330 331 set := query.NewRepoSet(repoSetNames...) 332 sub := &query.Substring{Pattern: "bla"} 333 334 repoIDsQuery := query.NewRepoIDs(repoIDs...) 335 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")} 336 337 queries := []query.Q{ 338 query.NewAnd(set, sub), 339 // Test with the same reposet again 340 query.NewAnd(set, sub), 341 342 query.NewAnd(branchesRepos, sub), 343 // Test with the same repoBranches with IDs again 344 query.NewAnd(branchesRepos, sub), 345 346 query.NewAnd(repoIDsQuery, sub), 347 // Test with the same repoIDs again 348 query.NewAnd(repoIDsQuery, sub), 349 350 query.NewAnd(repoQuery, sub), 351 query.NewAnd(repoQuery, sub), 352 353 // List has queries which are just the reposet atoms. We also test twice. 354 set, 355 set, 356 branchesRepos, 357 branchesRepos, 358 repoIDsQuery, 359 repoIDsQuery, 360 repoQuery, 361 repoQuery, 362 } 363 364 for _, q := range queries { 365 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 366 if err != nil { 367 t.Errorf("Search(%s): %v", q, err) 368 } 369 // Note: Assertion is based on fact that `rankSearcher` always returns a 370 // result and using repoSet will half the number of results 371 if len(res.Files) != len(repoSetNames) { 372 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames)) 373 } 374 } 375} 376 377func hash(name string) uint32 { 378 h := fnv.New32() 379 h.Write([]byte(name)) 380 return h.Sum32() 381} 382 383type memSeeker struct { 384 data []byte 385} 386 387func (s *memSeeker) Name() string { 388 return "memseeker" 389} 390 391func (s *memSeeker) Close() {} 392func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 393 return s.data[off : off+sz], nil 394} 395 396func (s *memSeeker) Size() (uint32, error) { 397 return uint32(len(s.data)), nil 398} 399 400func TestUnloadIndex(t *testing.T) { 401 b := testIndexBuilder(t, nil, zoekt.Document{ 402 Name: "filename", 403 Content: []byte("needle needle needle"), 404 }) 405 406 var buf bytes.Buffer 407 if err := b.Write(&buf); err != nil { 408 t.Fatal(err) 409 } 410 indexBytes := buf.Bytes() 411 indexFile := &memSeeker{indexBytes} 412 searcher, err := zoekt.NewSearcher(indexFile) 413 if err != nil { 414 t.Fatalf("NewSearcher: %v", err) 415 } 416 417 ss := newShardedSearcher(2) 418 ss.replace(map[string]zoekt.Searcher{"key": searcher}) 419 420 var opts zoekt.SearchOptions 421 q := &query.Substring{Pattern: "needle"} 422 res, err := ss.Search(context.Background(), q, &opts) 423 if err != nil { 424 t.Fatalf("Search(%s): %v", q, err) 425 } 426 427 forbidden := byte(29) 428 for i := range indexBytes { 429 // non-ASCII 430 indexBytes[i] = forbidden 431 } 432 433 for _, f := range res.Files { 434 if bytes.Contains(f.Content, []byte{forbidden}) { 435 t.Errorf("found %d in content %q", forbidden, f.Content) 436 } 437 if bytes.Contains(f.Checksum, []byte{forbidden}) { 438 t.Errorf("found %d in checksum %q", forbidden, f.Checksum) 439 } 440 441 for _, l := range f.LineMatches { 442 if bytes.Contains(l.Line, []byte{forbidden}) { 443 t.Errorf("found %d in line %q", forbidden, l.Line) 444 } 445 } 446 } 447} 448 449func TestShardedSearcher_List(t *testing.T) { 450 repos := []*zoekt.Repository{ 451 { 452 ID: 1234, 453 Name: "repo-a", 454 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 455 RawConfig: map[string]string{"repoid": "1234"}, 456 }, 457 { 458 Name: "repo-b", 459 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 460 }, 461 } 462 463 doc := zoekt.Document{ 464 Name: "foo.go", 465 Content: []byte("bar\nbaz"), 466 Branches: []string{"main", "dev"}, 467 } 468 469 // Test duplicate removal when ListOptions.Minimal is true and false 470 ss := newShardedSearcher(4) 471 ss.replace(map[string]zoekt.Searcher{ 472 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)), 473 "2": searcherForTest(t, testIndexBuilder(t, repos[0])), 474 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)), 475 "4": searcherForTest(t, testIndexBuilder(t, repos[1])), 476 }) 477 ss.markReady() 478 479 stats := zoekt.RepoStats{ 480 Shards: 2, 481 Documents: 1, 482 IndexBytes: 196, 483 ContentBytes: 13, 484 NewLinesCount: 1, 485 DefaultBranchNewLinesCount: 1, 486 OtherBranchesNewLinesCount: 1, 487 } 488 489 aggStats := stats 490 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works 491 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes. 492 493 for _, tc := range []struct { 494 name string 495 opts *zoekt.ListOptions 496 want *zoekt.RepoList 497 }{ 498 { 499 name: "nil opts", 500 opts: nil, 501 want: &zoekt.RepoList{ 502 Repos: []*zoekt.RepoListEntry{ 503 { 504 Repository: *repos[0], 505 Stats: stats, 506 }, 507 { 508 Repository: *repos[1], 509 Stats: stats, 510 }, 511 }, 512 Stats: aggStats, 513 }, 514 }, 515 { 516 name: "default", 517 opts: &zoekt.ListOptions{}, 518 want: &zoekt.RepoList{ 519 Repos: []*zoekt.RepoListEntry{ 520 { 521 Repository: *repos[0], 522 Stats: stats, 523 }, 524 { 525 Repository: *repos[1], 526 Stats: stats, 527 }, 528 }, 529 Stats: aggStats, 530 }, 531 }, 532 { 533 name: "field=repos", 534 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos}, 535 want: &zoekt.RepoList{ 536 Repos: []*zoekt.RepoListEntry{ 537 { 538 Repository: *repos[0], 539 Stats: stats, 540 }, 541 { 542 Repository: *repos[1], 543 Stats: stats, 544 }, 545 }, 546 Stats: aggStats, 547 }, 548 }, 549 { 550 name: "field=reposmap", 551 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}, 552 want: &zoekt.RepoList{ 553 Repos: []*zoekt.RepoListEntry{ 554 { 555 Repository: *repos[1], 556 Stats: stats, 557 }, 558 }, 559 ReposMap: zoekt.ReposMap{ 560 repos[0].ID: { 561 HasSymbols: repos[0].HasSymbols, 562 Branches: repos[0].Branches, 563 }, 564 }, 565 Stats: aggStats, 566 }, 567 }, 568 } { 569 tc := tc 570 t.Run(tc.name, func(t *testing.T) { 571 t.Parallel() 572 573 q := &query.Repo{Regexp: regexp.MustCompile("repo")} 574 575 res, err := ss.List(context.Background(), q, tc.opts) 576 if err != nil { 577 t.Fatalf("List(%v, %s): %v", q, tc.opts, err) 578 } 579 580 sort.Slice(res.Repos, func(i, j int) bool { 581 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name 582 }) 583 584 ignored := []cmp.Option{ 585 cmpopts.EquateEmpty(), 586 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 587 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 588 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 589 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 590 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 591 } 592 593 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" { 594 t.Fatalf("mismatch (-want +got):\n%s", diff) 595 } 596 }) 597 } 598} 599 600func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder { 601 b, err := zoekt.NewIndexBuilder(repo) 602 if err != nil { 603 t.Fatalf("NewIndexBuilder: %v", err) 604 } 605 606 for i, d := range docs { 607 if err := b.Add(d); err != nil { 608 t.Fatalf("Add %d: %v", i, err) 609 } 610 } 611 return b 612} 613 614func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { 615 var buf bytes.Buffer 616 if err := b.Write(&buf); err != nil { 617 t.Fatal(err) 618 } 619 f := &memSeeker{buf.Bytes()} 620 621 searcher, err := zoekt.NewSearcher(f) 622 if err != nil { 623 t.Fatalf("NewSearcher: %v", err) 624 } 625 626 return searcher 627} 628 629func reposForTest(n int) (result []*zoekt.Repository) { 630 for i := 0; i < n; i++ { 631 result = append(result, &zoekt.Repository{ 632 ID: uint32(i + 1), 633 Name: fmt.Sprintf("test-repository-%d", i), 634 }) 635 } 636 return result 637} 638 639func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { 640 builder := testIndexBuilder(b, r) 641 642 if err := builder.Add(zoekt.Document{ 643 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), 644 Content: []byte("needle needle needle haystack"), 645 }); err != nil { 646 b.Fatal(err) 647 } 648 649 for i := 1; i < numFiles; i++ { 650 if err := builder.Add(zoekt.Document{ 651 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), 652 Content: []byte("haystack haystack haystack"), 653 }); err != nil { 654 b.Fatal(err) 655 } 656 } 657 658 return searcherForTest(b, builder) 659} 660 661func BenchmarkShardedSearch(b *testing.B) { 662 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0))) 663 664 filesPerRepo := 300 665 repos := reposForTest(3000) 666 var repoSetIDs []uint32 667 668 shards := make(map[string]zoekt.Searcher, len(repos)) 669 for i, r := range repos { 670 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo) 671 if i%2 == 0 { 672 repoSetIDs = append(repoSetIDs, r.ID) 673 } 674 } 675 676 ss.replace(shards) 677 678 ctx := context.Background() 679 opts := &zoekt.SearchOptions{} 680 681 needleSub := &query.Substring{Pattern: "needle"} 682 haystackSub := &query.Substring{Pattern: "haystack"} 683 helloworldSub := &query.Substring{Pattern: "helloworld"} 684 haystackCap, err := query.Parse("hay(s(t))(a)ck") 685 if err != nil { 686 b.Fatal(err) 687 } 688 689 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck") 690 if err != nil { 691 b.Fatal(err) 692 } 693 694 setAnd := func(q query.Q) func() query.Q { 695 return func() query.Q { 696 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q) 697 } 698 } 699 700 search := func(b *testing.B, q query.Q, wantFiles int) { 701 b.Helper() 702 703 res, err := ss.Search(ctx, q, opts) 704 if err != nil { 705 b.Fatalf("Search(%s): %v", q, err) 706 } 707 if have := len(res.Files); have != wantFiles { 708 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles) 709 } 710 } 711 712 benchmarks := []struct { 713 name string 714 q func() query.Q 715 wantFiles int 716 }{ 717 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo}, 718 {"substring no results", func() query.Q { return helloworldSub }, 0}, 719 {"substring some results", func() query.Q { return needleSub }, len(repos)}, 720 721 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo}, 722 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo}, 723 724 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo}, 725 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)}, 726 {"substring no results and repo set", setAnd(helloworldSub), 0}, 727 } 728 729 for _, bb := range benchmarks { 730 b.Run(bb.name, func(b *testing.B) { 731 b.ReportAllocs() 732 733 for n := 0; n < b.N; n++ { 734 q := bb.q() 735 736 search(b, q, bb.wantFiles) 737 } 738 }) 739 } 740} 741 742func TestRawQuerySearch(t *testing.T) { 743 ss := newShardedSearcher(1) 744 745 var nextShardNum int 746 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) { 747 r := &zoekt.Repository{Name: repo} 748 r.RawConfig = rawConfig 749 b := testIndexBuilder(t, r, docs...) 750 shard := searcherForTest(t, b) 751 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) 752 nextShardNum++ 753 } 754 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 755 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 756 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 757 758 cases := []struct { 759 pattern string 760 flags query.RawConfig 761 wantRepos []string 762 wantFiles int 763 }{ 764 { 765 pattern: "bas", 766 flags: query.RcOnlyPublic, 767 wantRepos: []string{"public"}, 768 wantFiles: 1, 769 }, 770 { 771 pattern: "foo", 772 flags: query.RcOnlyPublic, 773 wantRepos: []string{"public", "public_fork"}, 774 wantFiles: 2, 775 }, 776 { 777 pattern: "foo", 778 flags: query.RcOnlyPublic | query.RcNoForks, 779 wantRepos: []string{"public"}, 780 wantFiles: 1, 781 }, 782 { 783 pattern: "bar", 784 flags: query.RcOnlyForks, 785 wantRepos: []string{"public_fork"}, 786 wantFiles: 1, 787 }, 788 { 789 pattern: "bas", 790 flags: query.RcNoArchived, 791 wantRepos: []string{"public"}, 792 wantFiles: 1, 793 }, 794 { 795 pattern: "foo", 796 flags: query.RcNoForks, 797 wantRepos: []string{"public", "private_archived"}, 798 wantFiles: 2, 799 }, 800 { 801 pattern: "bas", 802 flags: query.RcOnlyArchived, 803 wantRepos: []string{"private_archived"}, 804 wantFiles: 1, 805 }, 806 { 807 pattern: "foo", 808 flags: query.RcOnlyPrivate, 809 wantRepos: []string{"private_archived"}, 810 wantFiles: 1, 811 }, 812 { 813 pattern: "foo", 814 flags: query.RcOnlyPrivate | query.RcNoArchived, 815 wantRepos: []string{}, 816 wantFiles: 0, 817 }, 818 } 819 for _, c := range cases { 820 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) { 821 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags) 822 823 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 824 if err != nil { 825 t.Fatal(err) 826 } 827 828 if got := len(sr.Files); got != c.wantFiles { 829 t.Fatalf("wanted %d, got %d", c.wantFiles, got) 830 } 831 832 if c.wantFiles == 0 { 833 return 834 } 835 836 gotRepos := make([]string, 0, len(sr.RepoURLs)) 837 for k := range sr.RepoURLs { 838 gotRepos = append(gotRepos, k) 839 } 840 sort.Strings(gotRepos) 841 sort.Strings(c.wantRepos) 842 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" { 843 t.Fatalf("(-want, +got):\n%s", d) 844 } 845 }) 846 } 847} 848 849func TestPrioritySlice(t *testing.T) { 850 p := &prioritySlice{} 851 for step, oper := range []struct { 852 isAppend bool 853 value float64 854 expectedMax float64 855 }{ 856 {true, 1, 1}, 857 {true, 3, 3}, 858 {true, 2, 3}, 859 {false, 1, 3}, 860 {false, 3, 2}, 861 {false, 2, math.Inf(-1)}, 862 } { 863 if oper.isAppend { 864 p.append(oper.value) 865 } else { 866 p.remove(oper.value) 867 } 868 max := p.max() 869 if max != oper.expectedMax { 870 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax) 871 } 872 } 873} 874 875func TestSendByRepository(t *testing.T) { 876 wantStats := zoekt.Stats{} 877 wantStats.ShardsScanned = 1 878 879 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this 880 // test. 881 f := func(n1, n2, n3 uint8) bool { 882 sr := createMockSearchResult(n1, n2, n3, wantStats) 883 884 mock := &mockSender{} 885 sendByRepository(sr, &zoekt.SearchOptions{}, mock) 886 887 if diff := cmp.Diff(wantStats, mock.stats); diff != "" { 888 t.Logf("-want,+got\n%s", diff) 889 return false 890 } 891 892 nonZero := 0 893 for _, l := range []uint8{n1, n2, n3} { 894 if l > 0 { 895 nonZero++ 896 } 897 } 898 if l := len(mock.files); l != nonZero { 899 t.Logf("wanted results from %d repositores, got %d", nonZero, l) 900 return false 901 } 902 903 gotTotal := 0 904 for _, fs := range mock.files { 905 gotTotal += len(fs) 906 } 907 wantTotal := int(n1) + int(n2) + int(n3) 908 if gotTotal != wantTotal { 909 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal) 910 return false 911 } 912 913 for _, fs := range mock.files { 914 if len(fs) == 0 { 915 t.Logf("got search result with 0 file matches after split") 916 return false 917 } 918 } 919 return true 920 } 921 922 if err := quick.Check(f, nil); err != nil { 923 t.Error(err) 924 } 925} 926 927type mockSender struct { 928 stats zoekt.Stats 929 files [][]zoekt.FileMatch 930} 931 932func (s *mockSender) Send(sr *zoekt.SearchResult) { 933 s.stats.Add(sr.Stats) 934 if len(sr.Files) == 0 { 935 return 936 } 937 s.files = append(s.files, sr.Files) 938} 939 940func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult { 941 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)} 942 for i, n := range []uint8{n1, n2, n3} { 943 if n == 0 { 944 continue 945 } 946 tmp := mkSearchResult(int(n), uint32(i)) 947 sr.Files = append(sr.Files, tmp.Files...) 948 for k := range tmp.RepoURLs { 949 sr.RepoURLs[k] = "" 950 } 951 } 952 sr.Stats = stats 953 return sr 954} 955 956func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { 957 if n == 0 { 958 return &zoekt.SearchResult{} 959 } 960 fm := make([]zoekt.FileMatch, 0, n) 961 for i := 0; i < n; i++ { 962 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID}) 963 } 964 965 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}} 966} 967 968func TestFileBasedSearch(t *testing.T) { 969 cases := []struct { 970 name string 971 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 972 }{ 973 {"Search", testShardedSearch}, 974 {"StreamSearch", testShardedStreamSearch}, 975 } 976 977 c1 := []byte("I love bananas without skin") 978 // -----------0123456789012345678901234567890123456789 979 c2 := []byte("In Dutch, ananas means pineapple") 980 // -----------0123456789012345678901234567890123456789 981 b := testIndexBuilder(t, nil, 982 zoekt.Document{Name: "f1", Content: c1}, 983 zoekt.Document{Name: "f2", Content: c2}, 984 ) 985 986 for _, tt := range cases { 987 for _, useDocumentRanks := range []bool{false, true} { 988 t.Run(tt.name, func(t *testing.T) { 989 matches := tt.testShardedSearch(t, &query.Substring{ 990 CaseSensitive: false, 991 Pattern: "ananas", 992 }, b, useDocumentRanks) 993 994 if len(matches) != 2 { 995 t.Fatalf("got %v, want 2 matches", matches) 996 } 997 if matches[0].FileName != "f2" || matches[1].FileName != "f1" { 998 t.Fatalf("got %v, want matches {f1,f2}", matches) 999 } 1000 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 { 1001 t.Fatalf("got %#v, want offsets 10,8", matches) 1002 } 1003 }) 1004 } 1005 } 1006} 1007 1008func TestWordBoundaryRanking(t *testing.T) { 1009 cases := []struct { 1010 name string 1011 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1012 }{ 1013 {"Search", testShardedSearch}, 1014 {"StreamSearch", testShardedStreamSearch}, 1015 } 1016 1017 b := testIndexBuilder(t, nil, 1018 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")}, 1019 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, 1020 // -----------------------------------------0123456 789012 34567890 1021 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")}) 1022 1023 for _, tt := range cases { 1024 for _, useDocumentRanks := range []bool{false, true} { 1025 t.Run(tt.name, func(t *testing.T) { 1026 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks) 1027 1028 if len(files) != 3 { 1029 t.Fatalf("got %#v, want 3 files", files) 1030 } 1031 1032 file0 := files[0] 1033 if file0.FileName != "f2" || len(file0.LineMatches) != 3 { 1034 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0) 1035 } 1036 1037 if file0.LineMatches[0].LineFragments[0].Offset != 13 { 1038 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0]) 1039 } 1040 if file0.LineMatches[1].LineFragments[0].Offset != 7 { 1041 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1]) 1042 } 1043 }) 1044 } 1045 } 1046} 1047 1048func TestAtomCountScore(t *testing.T) { 1049 cases := []struct { 1050 name string 1051 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1052 }{ 1053 {"Search", testShardedSearch}, 1054 {"StreamSearch", testShardedStreamSearch}, 1055 } 1056 1057 b := testIndexBuilder(t, 1058 &zoekt.Repository{ 1059 Branches: []zoekt.RepositoryBranch{ 1060 {Name: "branches", Version: "v1"}, 1061 {Name: "needle", Version: "v2"}, 1062 }, 1063 }, 1064 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, 1065 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, 1066 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) 1067 1068 for _, tt := range cases { 1069 for _, useDocumentRanks := range []bool{false, true} { 1070 t.Run(tt.name, func(t *testing.T) { 1071 files := tt.testShardedSearch(t, 1072 query.NewOr( 1073 &query.Substring{Pattern: "needle"}, 1074 &query.Substring{Pattern: "needle", FileName: true}, 1075 &query.Branch{Pattern: "needle"}, 1076 ), b, useDocumentRanks) 1077 var got []string 1078 for _, f := range files { 1079 got = append(got, f.FileName) 1080 } 1081 want := []string{"needle-file-branch", "needle-file", "f1"} 1082 if !reflect.DeepEqual(got, want) { 1083 t.Errorf("got %v, want %v", got, want) 1084 } 1085 }) 1086 } 1087 } 1088} 1089 1090func TestUseBM25Scoring(t *testing.T) { 1091 b := testIndexBuilder(t, 1092 &zoekt.Repository{}, 1093 zoekt.Document{Name: "f1", Content: []byte("one two two three")}, 1094 zoekt.Document{Name: "f2", Content: []byte("one two one two")}, 1095 zoekt.Document{Name: "f3", Content: []byte("one three three three")}) 1096 1097 ss := newShardedSearcher(1) 1098 searcher := searcherForTest(t, b) 1099 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1100 1101 q := query.NewOr( 1102 &query.Substring{Pattern: "one"}, 1103 &query.Substring{Pattern: "three"}) 1104 1105 opts := zoekt.SearchOptions{ 1106 UseBM25Scoring: true, 1107 } 1108 1109 results, err := ss.Search(context.Background(), q, &opts) 1110 if err != nil { 1111 t.Fatal(err) 1112 } 1113 1114 var got []string 1115 for _, f := range results.Files { 1116 got = append(got, f.FileName) 1117 } 1118 1119 want := []string{"f3", "f1", "f2"} 1120 if !reflect.DeepEqual(got, want) { 1121 t.Errorf("got %v, want %v", got, want) 1122 } 1123} 1124 1125func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1126 ss := newShardedSearcher(1) 1127 searcher := searcherForTest(t, ib) 1128 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1129 1130 var files []zoekt.FileMatch 1131 sender := zoekt.SenderFunc(func(result *zoekt.SearchResult) { 1132 files = append(files, result.Files...) 1133 }) 1134 1135 opts := zoekt.SearchOptions{} 1136 if useDocumentRanks { 1137 opts.UseDocumentRanks = true 1138 opts.FlushWallTime = 10 * time.Millisecond 1139 } 1140 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil { 1141 t.Fatal(err) 1142 } 1143 return files 1144} 1145 1146func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1147 ss := newShardedSearcher(1) 1148 searcher := searcherForTest(t, ib) 1149 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1150 1151 opts := zoekt.SearchOptions{} 1152 if useDocumentRanks { 1153 opts.UseDocumentRanks = true 1154 opts.FlushWallTime = 50 * time.Millisecond 1155 } 1156 sres, _ := ss.Search(context.Background(), q, &opts) 1157 return sres.Files 1158} 1159 1160// Ensure we work on empty shard directories. 1161func TestNewDirectorySearcher_empty(t *testing.T) { 1162 ctx := context.Background() 1163 1164 test := func(t *testing.T, ss zoekt.Streamer) { 1165 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1166 if err != nil { 1167 t.Fatal("Search non-nil error", err) 1168 } 1169 1170 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" { 1171 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff) 1172 } 1173 1174 rl, err := ss.List(ctx, &query.Const{Value: true}, nil) 1175 if err != nil { 1176 t.Fatal("List non-nil error", err) 1177 } 1178 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" { 1179 t.Fatalf("List had non empty results (-want, +got):\n%s", diff) 1180 } 1181 } 1182 1183 dir := t.TempDir() 1184 t.Run("blocking", func(t *testing.T) { 1185 ss, err := NewDirectorySearcher(dir) 1186 if err != nil { 1187 t.Fatal(err) 1188 } 1189 t.Cleanup(ss.Close) 1190 1191 // We expect crashes to be empty as soon as NewDirectorySearcher returns 1192 // so we can validate straight away. 1193 test(t, ss) 1194 }) 1195 1196 t.Run("fast", func(t *testing.T) { 1197 ss, err := NewDirectorySearcherFast(dir) 1198 if err != nil { 1199 t.Fatal(err) 1200 } 1201 t.Cleanup(ss.Close) 1202 1203 deadline := testDeadline(t, 10*time.Second) 1204 1205 // Wait for scanning of directory to be done. We should be returning 1206 // non-zero crashes until then. 1207 waitForPredicate(deadline, 10*time.Millisecond, func() bool { 1208 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1209 if err != nil { 1210 t.Fatal(err) 1211 } 1212 return res.Stats.Crashes == 0 1213 }) 1214 1215 test(t, ss) 1216 }) 1217} 1218 1219// testDeadline returns the deadline for t, but ensures it is no longer than 1220// maxTimeout away. 1221func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time { 1222 deadline := time.Now().Add(maxTimeout) 1223 if d, ok := t.Deadline(); ok && d.Before(deadline) { 1224 // give 1s for us to do a final test run 1225 deadline = d.Add(-time.Second) 1226 } 1227 return deadline 1228} 1229 1230func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool { 1231 for time.Now().Before(deadline) { 1232 if pred() { 1233 return true 1234 } 1235 1236 time.Sleep(tick) 1237 } 1238 1239 return pred() 1240}