fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package shards 16 17import ( 18 "bytes" 19 "context" 20 "flag" 21 "fmt" 22 "hash/fnv" 23 "io" 24 "log" 25 "math" 26 "os" 27 "reflect" 28 "runtime" 29 "sort" 30 "strconv" 31 "testing" 32 "testing/quick" 33 "time" 34 35 "github.com/RoaringBitmap/roaring" 36 "github.com/google/go-cmp/cmp" 37 "github.com/google/go-cmp/cmp/cmpopts" 38 "github.com/grafana/regexp" 39 "github.com/sourcegraph/zoekt/index" 40 41 "github.com/sourcegraph/zoekt" 42 "github.com/sourcegraph/zoekt/query" 43) 44 45func TestMain(m *testing.M) { 46 flag.Parse() 47 if !testing.Verbose() { 48 log.SetOutput(io.Discard) 49 } 50 os.Exit(m.Run()) 51} 52 53type crashSearcher struct{} 54 55func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 56 panic("search") 57} 58 59func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 60 panic("list") 61} 62 63func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) { 64 return &zoekt.RepoStats{}, nil 65} 66 67func (s *crashSearcher) Close() {} 68 69func (s *crashSearcher) String() string { return "crashSearcher" } 70 71func TestCrashResilience(t *testing.T) { 72 out := &bytes.Buffer{} 73 oldOut := log.Writer() 74 log.SetOutput(out) 75 defer log.SetOutput(oldOut) 76 77 ss := newShardedSearcher(2) 78 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}}) 79 80 var wantCrashes int 81 test := func(t *testing.T) { 82 q := &query.Substring{Pattern: "hoi"} 83 opts := &zoekt.SearchOptions{} 84 if res, err := ss.Search(context.Background(), q, opts); err != nil { 85 t.Fatalf("Search: %v", err) 86 } else if res.Stats.Crashes != wantCrashes { 87 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes) 88 } 89 90 if res, err := ss.List(context.Background(), q, nil); err != nil { 91 t.Fatalf("List: %v", err) 92 } else if res.Crashes != wantCrashes { 93 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes) 94 } 95 } 96 97 // Before we are marked as ready we have one extra crash 98 wantCrashes = 2 99 t.Run("loading", test) 100 101 // After marking as ready we should only have the crashSearcher 102 // contributing. 103 ss.markReady() 104 wantCrashes = 1 105 t.Run("ready", test) 106} 107 108type rankSearcher struct { 109 rank uint16 110 repo *zoekt.Repository 111} 112 113func (s *rankSearcher) Close() { 114} 115 116func (s *rankSearcher) String() string { 117 return "" 118} 119 120func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 121 select { 122 case <-ctx.Done(): 123 return &zoekt.SearchResult{}, nil 124 default: 125 } 126 127 // Ugly, but without sleep it's too fast, and we can't 128 // simulate the cutoff. 129 time.Sleep(time.Millisecond) 130 return &zoekt.SearchResult{ 131 Files: []zoekt.FileMatch{ 132 { 133 FileName: fmt.Sprintf("f%d", s.rank), 134 Score: float64(s.rank), 135 }, 136 }, 137 Stats: zoekt.Stats{ 138 MatchCount: 1, 139 }, 140 }, nil 141} 142 143func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 144 r := zoekt.Repository{} 145 if s.repo != nil { 146 r = *s.repo 147 } 148 r.Rank = s.rank 149 return &zoekt.RepoList{ 150 Repos: []*zoekt.RepoListEntry{ 151 {Repository: r}, 152 }, 153 }, nil 154} 155 156func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo } 157 158func TestOrderByShard(t *testing.T) { 159 ss := newShardedSearcher(1) 160 161 n := 10 * runtime.GOMAXPROCS(0) 162 for i := 0; i < n; i++ { 163 ss.replace(map[string]zoekt.Searcher{ 164 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)}, 165 }) 166 } 167 168 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil { 169 t.Errorf("Search: %v", err) 170 } else if len(res.Files) != n { 171 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n) 172 } 173 174 opts := zoekt.SearchOptions{ 175 TotalMaxMatchCount: 3, 176 } 177 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts) 178 if err != nil { 179 t.Errorf("Search: %v", err) 180 } 181 182 if len(res.Files) < opts.TotalMaxMatchCount { 183 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount) 184 } 185 if len(res.Files) == n { 186 t.Errorf("got %d results, want < %d", len(res.Files), n) 187 } 188 for i, f := range res.Files { 189 rev := n - 1 - i 190 want := fmt.Sprintf("f%d", rev) 191 got := f.FileName 192 193 if got != want { 194 t.Logf("%d: got %q, want %q", i, got, want) 195 } 196 } 197} 198 199func TestShardedSearcher_Ranking(t *testing.T) { 200 ss := newShardedSearcher(1) 201 202 var nextShardNum int 203 addShard := func(repo string, priority float64, docs ...index.Document) { 204 r := &zoekt.Repository{ID: hash(repo), Name: repo} 205 r.RawConfig = map[string]string{ 206 "public": "1", 207 "priority": strconv.FormatFloat(priority, 'f', 2, 64), 208 } 209 b := testIndexBuilder(t, r, docs...) 210 shard := searcherForTest(t, b) 211 ss.replace(map[string]zoekt.Searcher{ 212 fmt.Sprintf("key-%d", nextShardNum): shard, 213 }) 214 nextShardNum++ 215 } 216 217 addShard("weekend-project", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) 218 addShard("moderately-popular", 500, index.Document{Name: "f3", Content: []byte("foo bar")}) 219 addShard("weekend-project-2", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) 220 addShard("super-star", 5000, index.Document{Name: "f1", Content: []byte("foo bar bas")}) 221 222 want := []string{ 223 "super-star", 224 "moderately-popular", 225 "weekend-project", 226 "weekend-project-2", 227 } 228 229 var have []string 230 for _, s := range ss.getLoaded().shards { 231 for _, r := range s.repos { 232 have = append(have, r.Name) 233 } 234 } 235 236 if !reflect.DeepEqual(want, have) { 237 t.Fatalf("\nwant: %s\nhave: %s", want, have) 238 } 239} 240 241func TestShardedSearcher_DocumentRanking(t *testing.T) { 242 ss := newShardedSearcher(1) 243 244 var nextShardNum int 245 addShard := func(repo string, rank uint16, docs ...index.Document) { 246 r := &zoekt.Repository{ID: hash(repo), Name: repo} 247 r.RawConfig = map[string]string{ 248 "public": "1", 249 } 250 r.Rank = rank 251 b := testIndexBuilder(t, r, docs...) 252 shard := searcherForTest(t, b) 253 ss.replace(map[string]zoekt.Searcher{ 254 fmt.Sprintf("key-%d", nextShardNum): shard, 255 }) 256 nextShardNum++ 257 } 258 259 addShard("old-project", 1, index.Document{Name: "f1", Content: []byte("foobar")}) 260 addShard("recent", 2, index.Document{Name: "f2", Content: []byte("foobaz")}) 261 addShard("old-project-2", 1, index.Document{Name: "f3", Content: []byte("foo bar")}) 262 addShard("new", 3, index.Document{Name: "f4", Content: []byte("foo baz")}, 263 index.Document{Name: "f5", Content: []byte("fooooo")}) 264 265 // Run a stream search and gather the results 266 var results []*zoekt.SearchResult 267 opts := &zoekt.SearchOptions{ 268 FlushWallTime: 100 * time.Millisecond, 269 } 270 271 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts, 272 zoekt.SenderFunc(func(event *zoekt.SearchResult) { 273 results = append(results, event) 274 })) 275 if err != nil { 276 t.Fatal(err) 277 } 278 279 // There should always be two stream results, first progress-only, then the file results 280 if len(results) != 2 { 281 t.Fatalf("expected 2 streamed results, but got %d", len(results)) 282 } 283 284 // The ranking should be determined by whether it's an exact word match, 285 // followed by repository priority 286 want := []string{"f4", "f3", "f5", "f2", "f1"} 287 288 files := results[1].Files 289 got := make([]string, len(files)) 290 for i := 0; i < len(files); i++ { 291 got[i] = files[i].FileName 292 } 293 294 if !reflect.DeepEqual(got, want) { 295 t.Errorf("got %v, want %v", got, want) 296 } 297} 298 299func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) { 300 ss := newShardedSearcher(1) 301 302 // namePrefix is so we can create a repo:foo filter and match the same set 303 // of repos. 304 namePrefix := [3]string{"foo", "bar", "baz"} 305 306 repoSetNames := []string{} 307 repoIDs := []uint32{} 308 n := 10 * runtime.GOMAXPROCS(0) 309 for i := 0; i < n; i++ { 310 shardName := fmt.Sprintf("shard%d", i) 311 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i) 312 repoID := hash(repoName) 313 314 if i%3 == 0 { 315 repoSetNames = append(repoSetNames, repoName) 316 repoIDs = append(repoIDs, repoID) 317 } 318 319 ss.replace(map[string]zoekt.Searcher{ 320 shardName: &rankSearcher{ 321 repo: &zoekt.Repository{ID: repoID, Name: repoName}, 322 rank: uint16(n - i), 323 }, 324 }) 325 } 326 327 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}) 328 if err != nil { 329 t.Errorf("Search: %v", err) 330 } 331 if len(res.Files) != n { 332 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n) 333 } 334 335 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{ 336 {Branch: "HEAD", Repos: roaring.New()}, 337 }} 338 339 for _, name := range repoSetNames { 340 branchesRepos.List[0].Repos.Add(hash(name)) 341 } 342 343 set := query.NewRepoSet(repoSetNames...) 344 sub := &query.Substring{Pattern: "bla"} 345 346 repoIDsQuery := query.NewRepoIDs(repoIDs...) 347 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")} 348 349 queries := []query.Q{ 350 query.NewAnd(set, sub), 351 // Test with the same reposet again 352 query.NewAnd(set, sub), 353 354 query.NewAnd(branchesRepos, sub), 355 // Test with the same repoBranches with IDs again 356 query.NewAnd(branchesRepos, sub), 357 358 query.NewAnd(repoIDsQuery, sub), 359 // Test with the same repoIDs again 360 query.NewAnd(repoIDsQuery, sub), 361 362 query.NewAnd(repoQuery, sub), 363 query.NewAnd(repoQuery, sub), 364 365 // List has queries which are just the reposet atoms. We also test twice. 366 set, 367 set, 368 branchesRepos, 369 branchesRepos, 370 repoIDsQuery, 371 repoIDsQuery, 372 repoQuery, 373 repoQuery, 374 } 375 376 for _, q := range queries { 377 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 378 if err != nil { 379 t.Errorf("Search(%s): %v", q, err) 380 } 381 // Note: Assertion is based on fact that `rankSearcher` always returns a 382 // result and using repoSet will half the number of results 383 if len(res.Files) != len(repoSetNames) { 384 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames)) 385 } 386 } 387} 388 389func hash(name string) uint32 { 390 h := fnv.New32() 391 h.Write([]byte(name)) 392 return h.Sum32() 393} 394 395type memSeeker struct { 396 data []byte 397} 398 399func (s *memSeeker) Name() string { 400 return "memseeker" 401} 402 403func (s *memSeeker) Close() {} 404func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 405 return s.data[off : off+sz], nil 406} 407 408func (s *memSeeker) Size() (uint32, error) { 409 return uint32(len(s.data)), nil 410} 411 412func TestUnloadIndex(t *testing.T) { 413 b := testIndexBuilder(t, nil, index.Document{ 414 Name: "filename", 415 Content: []byte("needle needle needle"), 416 }) 417 418 var buf bytes.Buffer 419 if err := b.Write(&buf); err != nil { 420 t.Fatal(err) 421 } 422 indexBytes := buf.Bytes() 423 indexFile := &memSeeker{indexBytes} 424 searcher, err := index.NewSearcher(indexFile) 425 if err != nil { 426 t.Fatalf("NewSearcher: %v", err) 427 } 428 429 ss := newShardedSearcher(2) 430 ss.replace(map[string]zoekt.Searcher{"key": searcher}) 431 432 var opts zoekt.SearchOptions 433 q := &query.Substring{Pattern: "needle"} 434 res, err := ss.Search(context.Background(), q, &opts) 435 if err != nil { 436 t.Fatalf("Search(%s): %v", q, err) 437 } 438 439 forbidden := byte(29) 440 for i := range indexBytes { 441 // non-ASCII 442 indexBytes[i] = forbidden 443 } 444 445 for _, f := range res.Files { 446 if bytes.Contains(f.Content, []byte{forbidden}) { 447 t.Errorf("found %d in content %q", forbidden, f.Content) 448 } 449 if bytes.Contains(f.Checksum, []byte{forbidden}) { 450 t.Errorf("found %d in checksum %q", forbidden, f.Checksum) 451 } 452 453 for _, l := range f.LineMatches { 454 if bytes.Contains(l.Line, []byte{forbidden}) { 455 t.Errorf("found %d in line %q", forbidden, l.Line) 456 } 457 } 458 } 459} 460 461func TestShardedSearcher_List(t *testing.T) { 462 repos := []*zoekt.Repository{ 463 { 464 ID: 1234, 465 Name: "repo-a", 466 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 467 RawConfig: map[string]string{"repoid": "1234"}, 468 }, 469 { 470 Name: "repo-b", 471 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 472 }, 473 } 474 475 doc := index.Document{ 476 Name: "foo.go", 477 Content: []byte("bar\nbaz"), 478 Branches: []string{"main", "dev"}, 479 } 480 481 // Test duplicate removal when ListOptions.Minimal is true and false 482 ss := newShardedSearcher(4) 483 ss.replace(map[string]zoekt.Searcher{ 484 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)), 485 "2": searcherForTest(t, testIndexBuilder(t, repos[0])), 486 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)), 487 "4": searcherForTest(t, testIndexBuilder(t, repos[1])), 488 }) 489 ss.markReady() 490 491 stats := zoekt.RepoStats{ 492 Shards: 2, 493 Documents: 1, 494 IndexBytes: 196, 495 ContentBytes: 13, 496 NewLinesCount: 1, 497 DefaultBranchNewLinesCount: 1, 498 OtherBranchesNewLinesCount: 1, 499 } 500 501 aggStats := stats 502 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works 503 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes. 504 505 for _, tc := range []struct { 506 name string 507 opts *zoekt.ListOptions 508 want *zoekt.RepoList 509 }{ 510 { 511 name: "nil opts", 512 opts: nil, 513 want: &zoekt.RepoList{ 514 Repos: []*zoekt.RepoListEntry{ 515 { 516 Repository: *repos[0], 517 Stats: stats, 518 }, 519 { 520 Repository: *repos[1], 521 Stats: stats, 522 }, 523 }, 524 Stats: aggStats, 525 }, 526 }, 527 { 528 name: "default", 529 opts: &zoekt.ListOptions{}, 530 want: &zoekt.RepoList{ 531 Repos: []*zoekt.RepoListEntry{ 532 { 533 Repository: *repos[0], 534 Stats: stats, 535 }, 536 { 537 Repository: *repos[1], 538 Stats: stats, 539 }, 540 }, 541 Stats: aggStats, 542 }, 543 }, 544 { 545 name: "field=repos", 546 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos}, 547 want: &zoekt.RepoList{ 548 Repos: []*zoekt.RepoListEntry{ 549 { 550 Repository: *repos[0], 551 Stats: stats, 552 }, 553 { 554 Repository: *repos[1], 555 Stats: stats, 556 }, 557 }, 558 Stats: aggStats, 559 }, 560 }, 561 { 562 name: "field=reposmap", 563 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}, 564 want: &zoekt.RepoList{ 565 Repos: []*zoekt.RepoListEntry{ 566 { 567 Repository: *repos[1], 568 Stats: stats, 569 }, 570 }, 571 ReposMap: zoekt.ReposMap{ 572 repos[0].ID: { 573 HasSymbols: repos[0].HasSymbols, 574 Branches: repos[0].Branches, 575 }, 576 }, 577 Stats: aggStats, 578 }, 579 }, 580 } { 581 tc := tc 582 t.Run(tc.name, func(t *testing.T) { 583 t.Parallel() 584 585 q := &query.Repo{Regexp: regexp.MustCompile("repo")} 586 587 res, err := ss.List(context.Background(), q, tc.opts) 588 if err != nil { 589 t.Fatalf("List(%v, %s): %v", q, tc.opts, err) 590 } 591 592 sort.Slice(res.Repos, func(i, j int) bool { 593 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name 594 }) 595 596 ignored := []cmp.Option{ 597 cmpopts.EquateEmpty(), 598 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 599 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 600 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 601 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 602 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 603 } 604 605 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" { 606 t.Fatalf("mismatch (-want +got):\n%s", diff) 607 } 608 }) 609 } 610} 611 612func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...index.Document) *index.IndexBuilder { 613 b, err := index.NewIndexBuilder(repo) 614 if err != nil { 615 t.Fatalf("NewIndexBuilder: %v", err) 616 } 617 618 for i, d := range docs { 619 if err := b.Add(d); err != nil { 620 t.Fatalf("Add %d: %v", i, err) 621 } 622 } 623 return b 624} 625 626func searcherForTest(t testing.TB, b *index.IndexBuilder) zoekt.Searcher { 627 var buf bytes.Buffer 628 if err := b.Write(&buf); err != nil { 629 t.Fatal(err) 630 } 631 f := &memSeeker{buf.Bytes()} 632 633 searcher, err := index.NewSearcher(f) 634 if err != nil { 635 t.Fatalf("NewSearcher: %v", err) 636 } 637 638 return searcher 639} 640 641func reposForTest(n int) (result []*zoekt.Repository) { 642 for i := 0; i < n; i++ { 643 result = append(result, &zoekt.Repository{ 644 ID: uint32(i + 1), 645 Name: fmt.Sprintf("test-repository-%d", i), 646 }) 647 } 648 return result 649} 650 651func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { 652 builder := testIndexBuilder(b, r) 653 654 if err := builder.Add(index.Document{ 655 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), 656 Content: []byte("needle needle needle haystack"), 657 }); err != nil { 658 b.Fatal(err) 659 } 660 661 for i := 1; i < numFiles; i++ { 662 if err := builder.Add(index.Document{ 663 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), 664 Content: []byte("haystack haystack haystack"), 665 }); err != nil { 666 b.Fatal(err) 667 } 668 } 669 670 return searcherForTest(b, builder) 671} 672 673func BenchmarkShardedSearch(b *testing.B) { 674 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0))) 675 676 filesPerRepo := 300 677 repos := reposForTest(3000) 678 var repoSetIDs []uint32 679 680 shards := make(map[string]zoekt.Searcher, len(repos)) 681 for i, r := range repos { 682 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo) 683 if i%2 == 0 { 684 repoSetIDs = append(repoSetIDs, r.ID) 685 } 686 } 687 688 ss.replace(shards) 689 690 ctx := context.Background() 691 opts := &zoekt.SearchOptions{} 692 693 needleSub := &query.Substring{Pattern: "needle"} 694 haystackSub := &query.Substring{Pattern: "haystack"} 695 helloworldSub := &query.Substring{Pattern: "helloworld"} 696 haystackCap, err := query.Parse("hay(s(t))(a)ck") 697 if err != nil { 698 b.Fatal(err) 699 } 700 701 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck") 702 if err != nil { 703 b.Fatal(err) 704 } 705 706 setAnd := func(q query.Q) func() query.Q { 707 return func() query.Q { 708 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q) 709 } 710 } 711 712 search := func(b *testing.B, q query.Q, wantFiles int) { 713 b.Helper() 714 715 res, err := ss.Search(ctx, q, opts) 716 if err != nil { 717 b.Fatalf("Search(%s): %v", q, err) 718 } 719 if have := len(res.Files); have != wantFiles { 720 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles) 721 } 722 } 723 724 benchmarks := []struct { 725 name string 726 q func() query.Q 727 wantFiles int 728 }{ 729 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo}, 730 {"substring no results", func() query.Q { return helloworldSub }, 0}, 731 {"substring some results", func() query.Q { return needleSub }, len(repos)}, 732 733 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo}, 734 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo}, 735 736 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo}, 737 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)}, 738 {"substring no results and repo set", setAnd(helloworldSub), 0}, 739 } 740 741 for _, bb := range benchmarks { 742 b.Run(bb.name, func(b *testing.B) { 743 b.ReportAllocs() 744 745 for n := 0; n < b.N; n++ { 746 q := bb.q() 747 748 search(b, q, bb.wantFiles) 749 } 750 }) 751 } 752} 753 754func TestRawQuerySearch(t *testing.T) { 755 ss := newShardedSearcher(1) 756 757 var nextShardNum int 758 addShard := func(repo string, rawConfig map[string]string, docs ...index.Document) { 759 r := &zoekt.Repository{Name: repo} 760 r.RawConfig = rawConfig 761 b := testIndexBuilder(t, r, docs...) 762 shard := searcherForTest(t, b) 763 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) 764 nextShardNum++ 765 } 766 addShard("public", map[string]string{"public": "1"}, index.Document{Name: "f1", Content: []byte("foo bar bas")}) 767 addShard("private_archived", map[string]string{"archived": "1"}, index.Document{Name: "f2", Content: []byte("foo bas")}) 768 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, index.Document{Name: "f3", Content: []byte("foo bar")}) 769 770 cases := []struct { 771 pattern string 772 flags query.RawConfig 773 wantRepos []string 774 wantFiles int 775 }{ 776 { 777 pattern: "bas", 778 flags: query.RcOnlyPublic, 779 wantRepos: []string{"public"}, 780 wantFiles: 1, 781 }, 782 { 783 pattern: "foo", 784 flags: query.RcOnlyPublic, 785 wantRepos: []string{"public", "public_fork"}, 786 wantFiles: 2, 787 }, 788 { 789 pattern: "foo", 790 flags: query.RcOnlyPublic | query.RcNoForks, 791 wantRepos: []string{"public"}, 792 wantFiles: 1, 793 }, 794 { 795 pattern: "bar", 796 flags: query.RcOnlyForks, 797 wantRepos: []string{"public_fork"}, 798 wantFiles: 1, 799 }, 800 { 801 pattern: "bas", 802 flags: query.RcNoArchived, 803 wantRepos: []string{"public"}, 804 wantFiles: 1, 805 }, 806 { 807 pattern: "foo", 808 flags: query.RcNoForks, 809 wantRepos: []string{"public", "private_archived"}, 810 wantFiles: 2, 811 }, 812 { 813 pattern: "bas", 814 flags: query.RcOnlyArchived, 815 wantRepos: []string{"private_archived"}, 816 wantFiles: 1, 817 }, 818 { 819 pattern: "foo", 820 flags: query.RcOnlyPrivate, 821 wantRepos: []string{"private_archived"}, 822 wantFiles: 1, 823 }, 824 { 825 pattern: "foo", 826 flags: query.RcOnlyPrivate | query.RcNoArchived, 827 wantRepos: []string{}, 828 wantFiles: 0, 829 }, 830 } 831 for _, c := range cases { 832 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) { 833 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags) 834 835 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 836 if err != nil { 837 t.Fatal(err) 838 } 839 840 if got := len(sr.Files); got != c.wantFiles { 841 t.Fatalf("wanted %d, got %d", c.wantFiles, got) 842 } 843 844 if c.wantFiles == 0 { 845 return 846 } 847 848 gotRepos := make([]string, 0, len(sr.RepoURLs)) 849 for k := range sr.RepoURLs { 850 gotRepos = append(gotRepos, k) 851 } 852 sort.Strings(gotRepos) 853 sort.Strings(c.wantRepos) 854 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" { 855 t.Fatalf("(-want, +got):\n%s", d) 856 } 857 }) 858 } 859} 860 861func TestPrioritySlice(t *testing.T) { 862 p := &prioritySlice{} 863 for step, oper := range []struct { 864 isAppend bool 865 value float64 866 expectedMax float64 867 }{ 868 {true, 1, 1}, 869 {true, 3, 3}, 870 {true, 2, 3}, 871 {false, 1, 3}, 872 {false, 3, 2}, 873 {false, 2, math.Inf(-1)}, 874 } { 875 if oper.isAppend { 876 p.append(oper.value) 877 } else { 878 p.remove(oper.value) 879 } 880 max := p.max() 881 if max != oper.expectedMax { 882 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax) 883 } 884 } 885} 886 887func TestSendByRepository(t *testing.T) { 888 wantStats := zoekt.Stats{} 889 wantStats.ShardsScanned = 1 890 891 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this 892 // test. 893 f := func(n1, n2, n3 uint8) bool { 894 sr := createMockSearchResult(n1, n2, n3, wantStats) 895 896 mock := &mockSender{} 897 sendByRepository(sr, &zoekt.SearchOptions{}, mock) 898 899 if diff := cmp.Diff(wantStats, mock.stats); diff != "" { 900 t.Logf("-want,+got\n%s", diff) 901 return false 902 } 903 904 nonZero := 0 905 for _, l := range []uint8{n1, n2, n3} { 906 if l > 0 { 907 nonZero++ 908 } 909 } 910 if l := len(mock.files); l != nonZero { 911 t.Logf("wanted results from %d repositores, got %d", nonZero, l) 912 return false 913 } 914 915 gotTotal := 0 916 for _, fs := range mock.files { 917 gotTotal += len(fs) 918 } 919 wantTotal := int(n1) + int(n2) + int(n3) 920 if gotTotal != wantTotal { 921 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal) 922 return false 923 } 924 925 for _, fs := range mock.files { 926 if len(fs) == 0 { 927 t.Logf("got search result with 0 file matches after split") 928 return false 929 } 930 } 931 return true 932 } 933 934 if err := quick.Check(f, nil); err != nil { 935 t.Error(err) 936 } 937} 938 939type mockSender struct { 940 stats zoekt.Stats 941 files [][]zoekt.FileMatch 942} 943 944func (s *mockSender) Send(sr *zoekt.SearchResult) { 945 s.stats.Add(sr.Stats) 946 if len(sr.Files) == 0 { 947 return 948 } 949 s.files = append(s.files, sr.Files) 950} 951 952func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult { 953 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)} 954 for i, n := range []uint8{n1, n2, n3} { 955 if n == 0 { 956 continue 957 } 958 tmp := mkSearchResult(int(n), uint32(i)) 959 sr.Files = append(sr.Files, tmp.Files...) 960 for k := range tmp.RepoURLs { 961 sr.RepoURLs[k] = "" 962 } 963 } 964 sr.Stats = stats 965 return sr 966} 967 968func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { 969 if n == 0 { 970 return &zoekt.SearchResult{} 971 } 972 fm := make([]zoekt.FileMatch, 0, n) 973 for i := 0; i < n; i++ { 974 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID}) 975 } 976 977 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}} 978} 979 980func TestFileBasedSearch(t *testing.T) { 981 cases := []struct { 982 name string 983 testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 984 }{ 985 {"Search", testShardedSearch}, 986 {"StreamSearch", testShardedStreamSearch}, 987 } 988 989 c1 := []byte("I love bananas without skin") 990 // -----------0123456789012345678901234567890123456789 991 c2 := []byte("In Dutch, ananas means pineapple") 992 // -----------0123456789012345678901234567890123456789 993 b := testIndexBuilder(t, nil, 994 index.Document{Name: "f1", Content: c1}, 995 index.Document{Name: "f2", Content: c2}, 996 ) 997 998 for _, tt := range cases { 999 for _, useDocumentRanks := range []bool{false, true} { 1000 t.Run(tt.name, func(t *testing.T) { 1001 matches := tt.testShardedSearch(t, &query.Substring{ 1002 CaseSensitive: false, 1003 Pattern: "ananas", 1004 }, b, useDocumentRanks) 1005 1006 if len(matches) != 2 { 1007 t.Fatalf("got %v, want 2 matches", matches) 1008 } 1009 if matches[0].FileName != "f2" || matches[1].FileName != "f1" { 1010 t.Fatalf("got %v, want matches {f1,f2}", matches) 1011 } 1012 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 { 1013 t.Fatalf("got %#v, want offsets 10,8", matches) 1014 } 1015 }) 1016 } 1017 } 1018} 1019 1020func TestWordBoundaryRanking(t *testing.T) { 1021 cases := []struct { 1022 name string 1023 testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1024 }{ 1025 {"Search", testShardedSearch}, 1026 {"StreamSearch", testShardedStreamSearch}, 1027 } 1028 1029 b := testIndexBuilder(t, nil, 1030 index.Document{Name: "f1", Content: []byte("xbytex xbytex")}, 1031 index.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, 1032 // -----------------------------------------0123456 789012 34567890 1033 index.Document{Name: "f3", Content: []byte("xbytex ybytex")}) 1034 1035 for _, tt := range cases { 1036 for _, useDocumentRanks := range []bool{false, true} { 1037 t.Run(tt.name, func(t *testing.T) { 1038 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks) 1039 1040 if len(files) != 3 { 1041 t.Fatalf("got %#v, want 3 files", files) 1042 } 1043 1044 file0 := files[0] 1045 if file0.FileName != "f2" || len(file0.LineMatches) != 3 { 1046 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0) 1047 } 1048 1049 if file0.LineMatches[0].LineFragments[0].Offset != 13 { 1050 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0]) 1051 } 1052 if file0.LineMatches[1].LineFragments[0].Offset != 7 { 1053 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1]) 1054 } 1055 }) 1056 } 1057 } 1058} 1059 1060func TestAtomCountScore(t *testing.T) { 1061 cases := []struct { 1062 name string 1063 testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1064 }{ 1065 {"Search", testShardedSearch}, 1066 {"StreamSearch", testShardedStreamSearch}, 1067 } 1068 1069 b := testIndexBuilder(t, 1070 &zoekt.Repository{ 1071 Branches: []zoekt.RepositoryBranch{ 1072 {Name: "branches", Version: "v1"}, 1073 {Name: "needle", Version: "v2"}, 1074 }, 1075 }, 1076 index.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, 1077 index.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, 1078 index.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) 1079 1080 for _, tt := range cases { 1081 for _, useDocumentRanks := range []bool{false, true} { 1082 t.Run(tt.name, func(t *testing.T) { 1083 files := tt.testShardedSearch(t, 1084 query.NewOr( 1085 &query.Substring{Pattern: "needle"}, 1086 &query.Substring{Pattern: "needle", FileName: true}, 1087 &query.Branch{Pattern: "needle"}, 1088 ), b, useDocumentRanks) 1089 var got []string 1090 for _, f := range files { 1091 got = append(got, f.FileName) 1092 } 1093 want := []string{"needle-file-branch", "needle-file", "f1"} 1094 if !reflect.DeepEqual(got, want) { 1095 t.Errorf("got %v, want %v", got, want) 1096 } 1097 }) 1098 } 1099 } 1100} 1101 1102func TestUseBM25Scoring(t *testing.T) { 1103 b := testIndexBuilder(t, 1104 &zoekt.Repository{}, 1105 index.Document{Name: "f1", Content: []byte("one two two three")}, 1106 index.Document{Name: "f2", Content: []byte("one two one two")}, 1107 index.Document{Name: "f3", Content: []byte("one three three three")}) 1108 1109 ss := newShardedSearcher(1) 1110 searcher := searcherForTest(t, b) 1111 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1112 1113 q := query.NewOr( 1114 &query.Substring{Pattern: "one"}, 1115 &query.Substring{Pattern: "three"}) 1116 1117 opts := zoekt.SearchOptions{ 1118 UseBM25Scoring: true, 1119 } 1120 1121 results, err := ss.Search(context.Background(), q, &opts) 1122 if err != nil { 1123 t.Fatal(err) 1124 } 1125 1126 var got []string 1127 for _, f := range results.Files { 1128 got = append(got, f.FileName) 1129 } 1130 1131 want := []string{"f3", "f1", "f2"} 1132 if !reflect.DeepEqual(got, want) { 1133 t.Errorf("got %v, want %v", got, want) 1134 } 1135} 1136 1137func testShardedStreamSearch(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1138 ss := newShardedSearcher(1) 1139 searcher := searcherForTest(t, ib) 1140 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1141 1142 var files []zoekt.FileMatch 1143 sender := zoekt.SenderFunc(func(result *zoekt.SearchResult) { 1144 files = append(files, result.Files...) 1145 }) 1146 1147 opts := zoekt.SearchOptions{} 1148 if useDocumentRanks { 1149 opts.FlushWallTime = 10 * time.Millisecond 1150 } 1151 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil { 1152 t.Fatal(err) 1153 } 1154 return files 1155} 1156 1157func testShardedSearch(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1158 ss := newShardedSearcher(1) 1159 searcher := searcherForTest(t, ib) 1160 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1161 1162 opts := zoekt.SearchOptions{} 1163 if useDocumentRanks { 1164 opts.FlushWallTime = 50 * time.Millisecond 1165 } 1166 sres, _ := ss.Search(context.Background(), q, &opts) 1167 return sres.Files 1168} 1169 1170// Ensure we work on empty shard directories. 1171func TestNewDirectorySearcher_empty(t *testing.T) { 1172 ctx := context.Background() 1173 1174 test := func(t *testing.T, ss zoekt.Streamer) { 1175 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1176 if err != nil { 1177 t.Fatal("Search non-nil error", err) 1178 } 1179 1180 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" { 1181 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff) 1182 } 1183 1184 rl, err := ss.List(ctx, &query.Const{Value: true}, nil) 1185 if err != nil { 1186 t.Fatal("List non-nil error", err) 1187 } 1188 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" { 1189 t.Fatalf("List had non empty results (-want, +got):\n%s", diff) 1190 } 1191 } 1192 1193 dir := t.TempDir() 1194 t.Run("blocking", func(t *testing.T) { 1195 ss, err := NewDirectorySearcher(dir) 1196 if err != nil { 1197 t.Fatal(err) 1198 } 1199 t.Cleanup(ss.Close) 1200 1201 // We expect crashes to be empty as soon as NewDirectorySearcher returns 1202 // so we can validate straight away. 1203 test(t, ss) 1204 }) 1205 1206 t.Run("fast", func(t *testing.T) { 1207 ss, err := NewDirectorySearcherFast(dir) 1208 if err != nil { 1209 t.Fatal(err) 1210 } 1211 t.Cleanup(ss.Close) 1212 1213 deadline := testDeadline(t, 10*time.Second) 1214 1215 // Wait for scanning of directory to be done. We should be returning 1216 // non-zero crashes until then. 1217 waitForPredicate(deadline, 10*time.Millisecond, func() bool { 1218 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1219 if err != nil { 1220 t.Fatal(err) 1221 } 1222 return res.Stats.Crashes == 0 1223 }) 1224 1225 test(t, ss) 1226 }) 1227} 1228 1229// testDeadline returns the deadline for t, but ensures it is no longer than 1230// maxTimeout away. 1231func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time { 1232 deadline := time.Now().Add(maxTimeout) 1233 if d, ok := t.Deadline(); ok && d.Before(deadline) { 1234 // give 1s for us to do a final test run 1235 deadline = d.Add(-time.Second) 1236 } 1237 return deadline 1238} 1239 1240func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool { 1241 for time.Now().Before(deadline) { 1242 if pred() { 1243 return true 1244 } 1245 1246 time.Sleep(tick) 1247 } 1248 1249 return pred() 1250}