fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package shards 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "hash/fnv" 22 "log" 23 "math" 24 "os" 25 "reflect" 26 "runtime" 27 "sort" 28 "strconv" 29 "testing" 30 "testing/quick" 31 "time" 32 33 "github.com/RoaringBitmap/roaring" 34 "github.com/google/go-cmp/cmp" 35 "github.com/google/go-cmp/cmp/cmpopts" 36 "github.com/grafana/regexp" 37 38 "github.com/sourcegraph/zoekt" 39 "github.com/sourcegraph/zoekt/query" 40 "github.com/sourcegraph/zoekt/stream" 41) 42 43type crashSearcher struct{} 44 45func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 46 panic("search") 47} 48 49func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 50 panic("list") 51} 52 53func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) { 54 return &zoekt.RepoStats{}, nil 55} 56 57func (s *crashSearcher) Close() {} 58 59func (s *crashSearcher) String() string { return "crashSearcher" } 60 61func TestCrashResilience(t *testing.T) { 62 out := &bytes.Buffer{} 63 log.SetOutput(out) 64 defer log.SetOutput(os.Stderr) 65 ss := newShardedSearcher(2) 66 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}}) 67 68 var wantCrashes int 69 test := func(t *testing.T) { 70 q := &query.Substring{Pattern: "hoi"} 71 opts := &zoekt.SearchOptions{} 72 if res, err := ss.Search(context.Background(), q, opts); err != nil { 73 t.Fatalf("Search: %v", err) 74 } else if res.Stats.Crashes != wantCrashes { 75 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes) 76 } 77 78 if res, err := ss.List(context.Background(), q, nil); err != nil { 79 t.Fatalf("List: %v", err) 80 } else if res.Crashes != wantCrashes { 81 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes) 82 } 83 } 84 85 // Before we are marked as ready we have one extra crash 86 wantCrashes = 2 87 t.Run("loading", test) 88 89 // After marking as ready we should only have the crashSearcher 90 // contributing. 91 ss.markReady() 92 wantCrashes = 1 93 t.Run("ready", test) 94} 95 96type rankSearcher struct { 97 rank uint16 98 repo *zoekt.Repository 99} 100 101func (s *rankSearcher) Close() { 102} 103 104func (s *rankSearcher) String() string { 105 return "" 106} 107 108func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 109 select { 110 case <-ctx.Done(): 111 return &zoekt.SearchResult{}, nil 112 default: 113 } 114 115 // Ugly, but without sleep it's too fast, and we can't 116 // simulate the cutoff. 117 time.Sleep(time.Millisecond) 118 return &zoekt.SearchResult{ 119 Files: []zoekt.FileMatch{ 120 { 121 FileName: fmt.Sprintf("f%d", s.rank), 122 Score: float64(s.rank), 123 }, 124 }, 125 Stats: zoekt.Stats{ 126 MatchCount: 1, 127 }, 128 }, nil 129} 130 131func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 132 r := zoekt.Repository{} 133 if s.repo != nil { 134 r = *s.repo 135 } 136 r.Rank = s.rank 137 return &zoekt.RepoList{ 138 Repos: []*zoekt.RepoListEntry{ 139 {Repository: r}, 140 }, 141 }, nil 142} 143 144func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo } 145 146func TestOrderByShard(t *testing.T) { 147 ss := newShardedSearcher(1) 148 149 n := 10 * runtime.GOMAXPROCS(0) 150 for i := 0; i < n; i++ { 151 ss.replace(map[string]zoekt.Searcher{ 152 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)}, 153 }) 154 } 155 156 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil { 157 t.Errorf("Search: %v", err) 158 } else if len(res.Files) != n { 159 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n) 160 } 161 162 opts := zoekt.SearchOptions{ 163 TotalMaxMatchCount: 3, 164 } 165 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts) 166 if err != nil { 167 t.Errorf("Search: %v", err) 168 } 169 170 if len(res.Files) < opts.TotalMaxMatchCount { 171 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount) 172 } 173 if len(res.Files) == n { 174 t.Errorf("got %d results, want < %d", len(res.Files), n) 175 } 176 for i, f := range res.Files { 177 rev := n - 1 - i 178 want := fmt.Sprintf("f%d", rev) 179 got := f.FileName 180 181 if got != want { 182 t.Logf("%d: got %q, want %q", i, got, want) 183 } 184 } 185} 186 187func TestShardedSearcher_Ranking(t *testing.T) { 188 ss := newShardedSearcher(1) 189 190 var nextShardNum int 191 addShard := func(repo string, priority float64, docs ...zoekt.Document) { 192 r := &zoekt.Repository{ID: hash(repo), Name: repo} 193 r.RawConfig = map[string]string{ 194 "public": "1", 195 "priority": strconv.FormatFloat(priority, 'f', 2, 64), 196 } 197 b := testIndexBuilder(t, r, docs...) 198 shard := searcherForTest(t, b) 199 ss.replace(map[string]zoekt.Searcher{ 200 fmt.Sprintf("key-%d", nextShardNum): shard, 201 }) 202 nextShardNum++ 203 } 204 205 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 206 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 207 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 208 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 209 210 want := []string{ 211 "super-star", 212 "moderately-popular", 213 "weekend-project", 214 "weekend-project-2", 215 } 216 217 var have []string 218 for _, s := range ss.getLoaded().shards { 219 for _, r := range s.repos { 220 have = append(have, r.Name) 221 } 222 } 223 224 if !reflect.DeepEqual(want, have) { 225 t.Fatalf("\nwant: %s\nhave: %s", want, have) 226 } 227} 228 229func TestShardedSearcher_DocumentRanking(t *testing.T) { 230 ss := newShardedSearcher(1) 231 232 var nextShardNum int 233 addShard := func(repo string, priority float64, docs ...zoekt.Document) { 234 r := &zoekt.Repository{ID: hash(repo), Name: repo} 235 r.RawConfig = map[string]string{ 236 "public": "1", 237 "priority": strconv.FormatFloat(priority, 'f', 2, 64), 238 } 239 b := testIndexBuilder(t, r, docs...) 240 shard := searcherForTest(t, b) 241 ss.replace(map[string]zoekt.Searcher{ 242 fmt.Sprintf("key-%d", nextShardNum): shard, 243 }) 244 nextShardNum++ 245 } 246 247 addShard("weekend-project", 20, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 248 addShard("moderately-popular", 500, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 249 addShard("weekend-project-2", 20, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 250 addShard("super-star", 5000, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 251 zoekt.Document{Name: "f5", Content: []byte("fooooo")}) 252 253 // Run a stream search and gather the results 254 var results []*zoekt.SearchResult 255 opts := &zoekt.SearchOptions{ 256 UseDocumentRanks: true, 257 FlushWallTime: 100 * time.Millisecond, 258 } 259 260 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts, 261 stream.SenderFunc(func(event *zoekt.SearchResult) { 262 results = append(results, event) 263 })) 264 265 if err != nil { 266 t.Fatal(err) 267 } 268 269 // There should always be two stream results, first progress-only, then the file results 270 if len(results) != 2 { 271 t.Fatalf("expected 2 streamed results, but got %d", len(results)) 272 } 273 274 // The ranking should be determined by whether it's an exact word match, 275 // followed by repository priority 276 want := []string{"f4", "f3", "f5", "f2", "f1"} 277 278 files := results[1].Files 279 got := make([]string, len(files)) 280 for i := 0; i < len(files); i++ { 281 got[i] = files[i].FileName 282 } 283 284 if !reflect.DeepEqual(got, want) { 285 t.Errorf("got %v, want %v", got, want) 286 } 287} 288 289func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) { 290 ss := newShardedSearcher(1) 291 292 repoSetNames := []string{} 293 repoIDs := []uint32{} 294 n := 10 * runtime.GOMAXPROCS(0) 295 for i := 0; i < n; i++ { 296 shardName := fmt.Sprintf("shard%d", i) 297 repoName := fmt.Sprintf("repository%.3d", i) 298 repoID := hash(repoName) 299 300 if i%3 == 0 { 301 repoSetNames = append(repoSetNames, repoName) 302 repoIDs = append(repoIDs, repoID) 303 } 304 305 ss.replace(map[string]zoekt.Searcher{ 306 shardName: &rankSearcher{ 307 repo: &zoekt.Repository{ID: repoID, Name: repoName}, 308 rank: uint16(n - i), 309 }, 310 }) 311 } 312 313 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}) 314 if err != nil { 315 t.Errorf("Search: %v", err) 316 } 317 if len(res.Files) != n { 318 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n) 319 } 320 321 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{ 322 {Branch: "HEAD", Repos: roaring.New()}, 323 }} 324 325 for _, name := range repoSetNames { 326 branchesRepos.List[0].Repos.Add(hash(name)) 327 } 328 329 set := query.NewRepoSet(repoSetNames...) 330 sub := &query.Substring{Pattern: "bla"} 331 332 repoIDsQuery := query.NewRepoIDs(repoIDs...) 333 334 queries := []query.Q{ 335 query.NewAnd(set, sub), 336 // Test with the same reposet again 337 query.NewAnd(set, sub), 338 339 query.NewAnd(branchesRepos, sub), 340 // Test with the same repoBranches with IDs again 341 query.NewAnd(branchesRepos, sub), 342 343 query.NewAnd(repoIDsQuery, sub), 344 // Test with the same repoIDs again 345 query.NewAnd(repoIDsQuery, sub), 346 } 347 348 for _, q := range queries { 349 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 350 if err != nil { 351 t.Errorf("Search(%s): %v", q, err) 352 } 353 // Note: Assertion is based on fact that `rankSearcher` always returns a 354 // result and using repoSet will half the number of results 355 if len(res.Files) != len(repoSetNames) { 356 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames)) 357 } 358 } 359} 360 361func hash(name string) uint32 { 362 h := fnv.New32() 363 h.Write([]byte(name)) 364 return h.Sum32() 365} 366 367type memSeeker struct { 368 data []byte 369} 370 371func (s *memSeeker) Name() string { 372 return "memseeker" 373} 374 375func (s *memSeeker) Close() {} 376func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 377 return s.data[off : off+sz], nil 378} 379 380func (s *memSeeker) Size() (uint32, error) { 381 return uint32(len(s.data)), nil 382} 383 384func TestUnloadIndex(t *testing.T) { 385 b := testIndexBuilder(t, nil, zoekt.Document{ 386 Name: "filename", 387 Content: []byte("needle needle needle"), 388 }) 389 390 var buf bytes.Buffer 391 if err := b.Write(&buf); err != nil { 392 t.Fatal(err) 393 } 394 indexBytes := buf.Bytes() 395 indexFile := &memSeeker{indexBytes} 396 searcher, err := zoekt.NewSearcher(indexFile) 397 if err != nil { 398 t.Fatalf("NewSearcher: %v", err) 399 } 400 401 ss := newShardedSearcher(2) 402 ss.replace(map[string]zoekt.Searcher{"key": searcher}) 403 404 var opts zoekt.SearchOptions 405 q := &query.Substring{Pattern: "needle"} 406 res, err := ss.Search(context.Background(), q, &opts) 407 if err != nil { 408 t.Fatalf("Search(%s): %v", q, err) 409 } 410 411 forbidden := byte(29) 412 for i := range indexBytes { 413 // non-ASCII 414 indexBytes[i] = forbidden 415 } 416 417 for _, f := range res.Files { 418 if bytes.Contains(f.Content, []byte{forbidden}) { 419 t.Errorf("found %d in content %q", forbidden, f.Content) 420 } 421 if bytes.Contains(f.Checksum, []byte{forbidden}) { 422 t.Errorf("found %d in checksum %q", forbidden, f.Checksum) 423 } 424 425 for _, l := range f.LineMatches { 426 if bytes.Contains(l.Line, []byte{forbidden}) { 427 t.Errorf("found %d in line %q", forbidden, l.Line) 428 } 429 } 430 } 431} 432 433func TestShardedSearcher_List(t *testing.T) { 434 repos := []*zoekt.Repository{ 435 { 436 ID: 1234, 437 Name: "repo-a", 438 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 439 RawConfig: map[string]string{"repoid": "1234"}, 440 }, 441 { 442 Name: "repo-b", 443 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 444 }, 445 } 446 447 doc := zoekt.Document{ 448 Name: "foo.go", 449 Content: []byte("bar\nbaz"), 450 Branches: []string{"main", "dev"}, 451 } 452 453 // Test duplicate removal when ListOptions.Minimal is true and false 454 ss := newShardedSearcher(4) 455 ss.replace(map[string]zoekt.Searcher{ 456 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)), 457 "2": searcherForTest(t, testIndexBuilder(t, repos[0])), 458 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)), 459 "4": searcherForTest(t, testIndexBuilder(t, repos[1])), 460 }) 461 ss.markReady() 462 463 stats := zoekt.RepoStats{ 464 Shards: 2, 465 Documents: 1, 466 IndexBytes: 196, 467 ContentBytes: 13, 468 NewLinesCount: 1, 469 DefaultBranchNewLinesCount: 1, 470 OtherBranchesNewLinesCount: 1, 471 } 472 473 aggStats := stats 474 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works 475 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes. 476 477 for _, tc := range []struct { 478 name string 479 opts *zoekt.ListOptions 480 want *zoekt.RepoList 481 }{ 482 { 483 name: "nil opts", 484 opts: nil, 485 want: &zoekt.RepoList{ 486 Repos: []*zoekt.RepoListEntry{ 487 { 488 Repository: *repos[0], 489 Stats: stats, 490 }, 491 { 492 Repository: *repos[1], 493 Stats: stats, 494 }, 495 }, 496 Stats: aggStats, 497 }, 498 }, 499 { 500 name: "minimal=false", 501 opts: &zoekt.ListOptions{Minimal: false}, 502 want: &zoekt.RepoList{ 503 Repos: []*zoekt.RepoListEntry{ 504 { 505 Repository: *repos[0], 506 Stats: stats, 507 }, 508 { 509 Repository: *repos[1], 510 Stats: stats, 511 }, 512 }, 513 Stats: aggStats, 514 }, 515 }, 516 { 517 name: "minimal=true", 518 opts: &zoekt.ListOptions{Minimal: true}, 519 want: &zoekt.RepoList{ 520 Repos: []*zoekt.RepoListEntry{ 521 { 522 Repository: *repos[1], 523 Stats: stats, 524 }, 525 }, 526 Minimal: map[uint32]*zoekt.MinimalRepoListEntry{ 527 repos[0].ID: { 528 HasSymbols: repos[0].HasSymbols, 529 Branches: repos[0].Branches, 530 }, 531 }, 532 Stats: aggStats, 533 }, 534 }, 535 { 536 name: "field=repos", 537 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos}, 538 want: &zoekt.RepoList{ 539 Repos: []*zoekt.RepoListEntry{ 540 { 541 Repository: *repos[0], 542 Stats: stats, 543 }, 544 { 545 Repository: *repos[1], 546 Stats: stats, 547 }, 548 }, 549 Stats: aggStats, 550 }, 551 }, 552 { 553 name: "field=minimal", 554 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldMinimal}, 555 want: &zoekt.RepoList{ 556 Repos: []*zoekt.RepoListEntry{ 557 { 558 Repository: *repos[1], 559 Stats: stats, 560 }, 561 }, 562 Minimal: map[uint32]*zoekt.MinimalRepoListEntry{ 563 repos[0].ID: { 564 HasSymbols: repos[0].HasSymbols, 565 Branches: repos[0].Branches, 566 }, 567 }, 568 Stats: aggStats, 569 }, 570 }, 571 { 572 name: "field=reposmap", 573 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}, 574 want: &zoekt.RepoList{ 575 Repos: []*zoekt.RepoListEntry{ 576 { 577 Repository: *repos[1], 578 Stats: stats, 579 }, 580 }, 581 ReposMap: zoekt.ReposMap{ 582 repos[0].ID: { 583 HasSymbols: repos[0].HasSymbols, 584 Branches: repos[0].Branches, 585 }, 586 }, 587 Stats: aggStats, 588 }, 589 }, 590 } { 591 tc := tc 592 t.Run(tc.name, func(t *testing.T) { 593 t.Parallel() 594 595 q := &query.Repo{Regexp: regexp.MustCompile("repo")} 596 597 res, err := ss.List(context.Background(), q, tc.opts) 598 if err != nil { 599 t.Fatalf("List(%v, %s): %v", q, tc.opts, err) 600 } 601 602 sort.Slice(res.Repos, func(i, j int) bool { 603 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name 604 }) 605 606 ignored := []cmp.Option{ 607 cmpopts.EquateEmpty(), 608 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 609 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 610 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 611 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 612 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 613 } 614 615 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" { 616 t.Fatalf("mismatch (-want +got):\n%s", diff) 617 } 618 }) 619 } 620} 621 622func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder { 623 b, err := zoekt.NewIndexBuilder(repo) 624 if err != nil { 625 t.Fatalf("NewIndexBuilder: %v", err) 626 } 627 628 for i, d := range docs { 629 if err := b.Add(d); err != nil { 630 t.Fatalf("Add %d: %v", i, err) 631 } 632 } 633 return b 634} 635 636func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { 637 var buf bytes.Buffer 638 if err := b.Write(&buf); err != nil { 639 t.Fatal(err) 640 } 641 f := &memSeeker{buf.Bytes()} 642 643 searcher, err := zoekt.NewSearcher(f) 644 if err != nil { 645 t.Fatalf("NewSearcher: %v", err) 646 } 647 648 return searcher 649} 650 651func reposForTest(n int) (result []*zoekt.Repository) { 652 for i := 0; i < n; i++ { 653 result = append(result, &zoekt.Repository{ 654 ID: uint32(i + 1), 655 Name: fmt.Sprintf("test-repository-%d", i), 656 }) 657 } 658 return result 659} 660 661func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { 662 builder := testIndexBuilder(b, r) 663 664 if err := builder.Add(zoekt.Document{ 665 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), 666 Content: []byte("needle needle needle haystack"), 667 }); err != nil { 668 b.Fatal(err) 669 } 670 671 for i := 1; i < numFiles; i++ { 672 if err := builder.Add(zoekt.Document{ 673 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), 674 Content: []byte("haystack haystack haystack"), 675 }); err != nil { 676 b.Fatal(err) 677 } 678 } 679 680 return searcherForTest(b, builder) 681} 682 683func BenchmarkShardedSearch(b *testing.B) { 684 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0))) 685 686 filesPerRepo := 300 687 repos := reposForTest(3000) 688 var repoSetIDs []uint32 689 690 shards := make(map[string]zoekt.Searcher, len(repos)) 691 for i, r := range repos { 692 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo) 693 if i%2 == 0 { 694 repoSetIDs = append(repoSetIDs, r.ID) 695 } 696 } 697 698 ss.replace(shards) 699 700 ctx := context.Background() 701 opts := &zoekt.SearchOptions{} 702 703 needleSub := &query.Substring{Pattern: "needle"} 704 haystackSub := &query.Substring{Pattern: "haystack"} 705 helloworldSub := &query.Substring{Pattern: "helloworld"} 706 haystackCap, err := query.Parse("hay(s(t))(a)ck") 707 if err != nil { 708 b.Fatal(err) 709 } 710 711 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck") 712 if err != nil { 713 b.Fatal(err) 714 } 715 716 setAnd := func(q query.Q) func() query.Q { 717 return func() query.Q { 718 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q) 719 } 720 } 721 722 search := func(b *testing.B, q query.Q, wantFiles int) { 723 b.Helper() 724 725 res, err := ss.Search(ctx, q, opts) 726 if err != nil { 727 b.Fatalf("Search(%s): %v", q, err) 728 } 729 if have := len(res.Files); have != wantFiles { 730 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles) 731 } 732 } 733 734 benchmarks := []struct { 735 name string 736 q func() query.Q 737 wantFiles int 738 }{ 739 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo}, 740 {"substring no results", func() query.Q { return helloworldSub }, 0}, 741 {"substring some results", func() query.Q { return needleSub }, len(repos)}, 742 743 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo}, 744 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo}, 745 746 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo}, 747 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)}, 748 {"substring no results and repo set", setAnd(helloworldSub), 0}, 749 } 750 751 for _, bb := range benchmarks { 752 b.Run(bb.name, func(b *testing.B) { 753 b.ReportAllocs() 754 755 for n := 0; n < b.N; n++ { 756 q := bb.q() 757 758 search(b, q, bb.wantFiles) 759 } 760 }) 761 } 762} 763 764func TestRawQuerySearch(t *testing.T) { 765 ss := newShardedSearcher(1) 766 767 var nextShardNum int 768 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) { 769 r := &zoekt.Repository{Name: repo} 770 r.RawConfig = rawConfig 771 b := testIndexBuilder(t, r, docs...) 772 shard := searcherForTest(t, b) 773 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) 774 nextShardNum++ 775 } 776 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 777 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 778 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 779 780 cases := []struct { 781 pattern string 782 flags query.RawConfig 783 wantRepos []string 784 wantFiles int 785 }{ 786 { 787 pattern: "bas", 788 flags: query.RcOnlyPublic, 789 wantRepos: []string{"public"}, 790 wantFiles: 1, 791 }, 792 { 793 pattern: "foo", 794 flags: query.RcOnlyPublic, 795 wantRepos: []string{"public", "public_fork"}, 796 wantFiles: 2, 797 }, 798 { 799 pattern: "foo", 800 flags: query.RcOnlyPublic | query.RcNoForks, 801 wantRepos: []string{"public"}, 802 wantFiles: 1, 803 }, 804 { 805 pattern: "bar", 806 flags: query.RcOnlyForks, 807 wantRepos: []string{"public_fork"}, 808 wantFiles: 1, 809 }, 810 { 811 pattern: "bas", 812 flags: query.RcNoArchived, 813 wantRepos: []string{"public"}, 814 wantFiles: 1, 815 }, 816 { 817 pattern: "foo", 818 flags: query.RcNoForks, 819 wantRepos: []string{"public", "private_archived"}, 820 wantFiles: 2, 821 }, 822 { 823 pattern: "bas", 824 flags: query.RcOnlyArchived, 825 wantRepos: []string{"private_archived"}, 826 wantFiles: 1, 827 }, 828 { 829 pattern: "foo", 830 flags: query.RcOnlyPrivate, 831 wantRepos: []string{"private_archived"}, 832 wantFiles: 1, 833 }, 834 { 835 pattern: "foo", 836 flags: query.RcOnlyPrivate | query.RcNoArchived, 837 wantRepos: []string{}, 838 wantFiles: 0, 839 }, 840 } 841 for _, c := range cases { 842 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) { 843 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags) 844 845 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 846 if err != nil { 847 t.Fatal(err) 848 } 849 850 if got := len(sr.Files); got != c.wantFiles { 851 t.Fatalf("wanted %d, got %d", c.wantFiles, got) 852 } 853 854 if c.wantFiles == 0 { 855 return 856 } 857 858 gotRepos := make([]string, 0, len(sr.RepoURLs)) 859 for k := range sr.RepoURLs { 860 gotRepos = append(gotRepos, k) 861 } 862 sort.Strings(gotRepos) 863 sort.Strings(c.wantRepos) 864 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" { 865 t.Fatalf("(-want, +got):\n%s", d) 866 } 867 }) 868 } 869} 870 871func TestPrioritySlice(t *testing.T) { 872 p := &prioritySlice{} 873 for step, oper := range []struct { 874 isAppend bool 875 value float64 876 expectedMax float64 877 }{ 878 {true, 1, 1}, 879 {true, 3, 3}, 880 {true, 2, 3}, 881 {false, 1, 3}, 882 {false, 3, 2}, 883 {false, 2, math.Inf(-1)}, 884 } { 885 if oper.isAppend { 886 p.append(oper.value) 887 } else { 888 p.remove(oper.value) 889 } 890 max := p.max() 891 if max != oper.expectedMax { 892 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax) 893 } 894 } 895} 896 897func TestSendByRepository(t *testing.T) { 898 wantStats := zoekt.Stats{} 899 wantStats.ShardsScanned = 1 900 901 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this 902 // test. 903 f := func(n1, n2, n3 uint8) bool { 904 905 sr := createMockSearchResult(n1, n2, n3, wantStats) 906 907 mock := &mockSender{} 908 sendByRepository(sr, &zoekt.SearchOptions{}, mock) 909 910 if diff := cmp.Diff(wantStats, mock.stats); diff != "" { 911 t.Logf("-want,+got\n%s", diff) 912 return false 913 } 914 915 nonZero := 0 916 for _, l := range []uint8{n1, n2, n3} { 917 if l > 0 { 918 nonZero++ 919 } 920 } 921 if l := len(mock.files); l != nonZero { 922 t.Logf("wanted results from %d repositores, got %d", nonZero, l) 923 return false 924 } 925 926 gotTotal := 0 927 for _, fs := range mock.files { 928 gotTotal += len(fs) 929 } 930 wantTotal := int(n1) + int(n2) + int(n3) 931 if gotTotal != wantTotal { 932 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal) 933 return false 934 } 935 936 for _, fs := range mock.files { 937 if len(fs) == 0 { 938 t.Logf("got search result with 0 file matches after split") 939 return false 940 } 941 } 942 return true 943 } 944 945 if err := quick.Check(f, nil); err != nil { 946 t.Error(err) 947 } 948} 949 950type mockSender struct { 951 stats zoekt.Stats 952 files [][]zoekt.FileMatch 953} 954 955func (s *mockSender) Send(sr *zoekt.SearchResult) { 956 s.stats.Add(sr.Stats) 957 if len(sr.Files) == 0 { 958 return 959 } 960 s.files = append(s.files, sr.Files) 961} 962 963func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult { 964 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)} 965 for i, n := range []uint8{n1, n2, n3} { 966 if n == 0 { 967 continue 968 } 969 tmp := mkSearchResult(int(n), uint32(i)) 970 sr.Files = append(sr.Files, tmp.Files...) 971 for k := range tmp.RepoURLs { 972 sr.RepoURLs[k] = "" 973 } 974 } 975 sr.Stats = stats 976 return sr 977} 978 979func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { 980 if n == 0 { 981 return &zoekt.SearchResult{} 982 } 983 fm := make([]zoekt.FileMatch, 0, n) 984 for i := 0; i < n; i++ { 985 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID}) 986 } 987 988 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}} 989} 990 991func TestFileBasedSearch(t *testing.T) { 992 cases := []struct { 993 name string 994 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 995 }{ 996 {"Search", testShardedSearch}, 997 {"StreamSearch", testShardedStreamSearch}, 998 } 999 1000 c1 := []byte("I love bananas without skin") 1001 // -----------0123456789012345678901234567890123456789 1002 c2 := []byte("In Dutch, ananas means pineapple") 1003 // -----------0123456789012345678901234567890123456789 1004 b := testIndexBuilder(t, nil, 1005 zoekt.Document{Name: "f1", Content: c1}, 1006 zoekt.Document{Name: "f2", Content: c2}, 1007 ) 1008 1009 for _, tt := range cases { 1010 for _, useDocumentRanks := range []bool{false, true} { 1011 t.Run(tt.name, func(t *testing.T) { 1012 matches := tt.testShardedSearch(t, &query.Substring{ 1013 CaseSensitive: false, 1014 Pattern: "ananas", 1015 }, b, useDocumentRanks) 1016 1017 if len(matches) != 2 { 1018 t.Fatalf("got %v, want 2 matches", matches) 1019 } 1020 if matches[0].FileName != "f2" || matches[1].FileName != "f1" { 1021 t.Fatalf("got %v, want matches {f1,f2}", matches) 1022 } 1023 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 { 1024 t.Fatalf("got %#v, want offsets 10,8", matches) 1025 } 1026 }) 1027 } 1028 } 1029} 1030 1031func TestWordBoundaryRanking(t *testing.T) { 1032 cases := []struct { 1033 name string 1034 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1035 }{ 1036 {"Search", testShardedSearch}, 1037 {"StreamSearch", testShardedStreamSearch}, 1038 } 1039 1040 b := testIndexBuilder(t, nil, 1041 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")}, 1042 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, 1043 // -----------------------------------------0123456 789012 34567890 1044 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")}) 1045 1046 for _, tt := range cases { 1047 for _, useDocumentRanks := range []bool{false, true} { 1048 t.Run(tt.name, func(t *testing.T) { 1049 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks) 1050 1051 if len(files) != 3 { 1052 t.Fatalf("got %#v, want 3 files", files) 1053 } 1054 1055 file0 := files[0] 1056 if file0.FileName != "f2" || len(file0.LineMatches) != 3 { 1057 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0) 1058 } 1059 1060 if file0.LineMatches[0].LineFragments[0].Offset != 13 { 1061 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0]) 1062 } 1063 if file0.LineMatches[1].LineFragments[0].Offset != 7 { 1064 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1]) 1065 } 1066 }) 1067 } 1068 } 1069} 1070 1071func TestAtomCountScore(t *testing.T) { 1072 cases := []struct { 1073 name string 1074 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1075 }{ 1076 {"Search", testShardedSearch}, 1077 {"StreamSearch", testShardedStreamSearch}, 1078 } 1079 1080 b := testIndexBuilder(t, 1081 &zoekt.Repository{ 1082 Branches: []zoekt.RepositoryBranch{ 1083 {Name: "branches", Version: "v1"}, 1084 {Name: "needle", Version: "v2"}, 1085 }, 1086 }, 1087 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, 1088 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, 1089 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) 1090 1091 for _, tt := range cases { 1092 for _, useDocumentRanks := range []bool{false, true} { 1093 t.Run(tt.name, func(t *testing.T) { 1094 files := tt.testShardedSearch(t, 1095 query.NewOr( 1096 &query.Substring{Pattern: "needle"}, 1097 &query.Substring{Pattern: "needle", FileName: true}, 1098 &query.Branch{Pattern: "needle"}, 1099 ), b, useDocumentRanks) 1100 var got []string 1101 for _, f := range files { 1102 got = append(got, f.FileName) 1103 } 1104 want := []string{"needle-file-branch", "needle-file", "f1"} 1105 if !reflect.DeepEqual(got, want) { 1106 t.Errorf("got %v, want %v", got, want) 1107 } 1108 }) 1109 } 1110 } 1111} 1112 1113func TestUseKeywordScoring(t *testing.T) { 1114 b := testIndexBuilder(t, 1115 &zoekt.Repository{}, 1116 zoekt.Document{Name: "f1", Content: []byte("one two two three")}, 1117 zoekt.Document{Name: "f2", Content: []byte("one two one two")}, 1118 zoekt.Document{Name: "f3", Content: []byte("one three three three")}) 1119 1120 ss := newShardedSearcher(1) 1121 searcher := searcherForTest(t, b) 1122 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1123 1124 q := query.NewOr( 1125 &query.Substring{Pattern: "one"}, 1126 &query.Substring{Pattern: "three"}) 1127 1128 opts := zoekt.SearchOptions{ 1129 UseKeywordScoring: true, 1130 } 1131 1132 results, err := ss.Search(context.Background(), q, &opts) 1133 if err != nil { 1134 t.Fatal(err) 1135 } 1136 1137 var got []string 1138 for _, f := range results.Files { 1139 got = append(got, f.FileName) 1140 } 1141 1142 want := []string{"f3", "f1", "f2"} 1143 if !reflect.DeepEqual(got, want) { 1144 t.Errorf("got %v, want %v", got, want) 1145 } 1146} 1147 1148func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1149 ss := newShardedSearcher(1) 1150 searcher := searcherForTest(t, ib) 1151 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1152 1153 var files []zoekt.FileMatch 1154 sender := stream.SenderFunc(func(result *zoekt.SearchResult) { 1155 files = append(files, result.Files...) 1156 }) 1157 1158 opts := zoekt.SearchOptions{} 1159 if useDocumentRanks { 1160 opts.UseDocumentRanks = true 1161 opts.FlushWallTime = 10 * time.Millisecond 1162 } 1163 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil { 1164 t.Fatal(err) 1165 } 1166 return files 1167} 1168 1169func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1170 ss := newShardedSearcher(1) 1171 searcher := searcherForTest(t, ib) 1172 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1173 1174 opts := zoekt.SearchOptions{} 1175 if useDocumentRanks { 1176 opts.UseDocumentRanks = true 1177 opts.FlushWallTime = 50 * time.Millisecond 1178 } 1179 sres, _ := ss.Search(context.Background(), q, &opts) 1180 return sres.Files 1181} 1182 1183// Ensure we work on empty shard directories. 1184func TestNewDirectorySearcher_empty(t *testing.T) { 1185 ctx := context.Background() 1186 1187 test := func(t *testing.T, ss zoekt.Streamer) { 1188 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1189 if err != nil { 1190 t.Fatal("Search non-nil error", err) 1191 } 1192 1193 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" { 1194 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff) 1195 } 1196 1197 rl, err := ss.List(ctx, &query.Const{Value: true}, nil) 1198 if err != nil { 1199 t.Fatal("List non-nil error", err) 1200 } 1201 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" { 1202 t.Fatalf("List had non empty results (-want, +got):\n%s", diff) 1203 } 1204 } 1205 1206 dir := t.TempDir() 1207 t.Run("blocking", func(t *testing.T) { 1208 ss, err := NewDirectorySearcher(dir) 1209 if err != nil { 1210 t.Fatal(err) 1211 } 1212 t.Cleanup(ss.Close) 1213 1214 // We expect crashes to be empty as soon as NewDirectorySearcher returns 1215 // so we can validate straight away. 1216 test(t, ss) 1217 }) 1218 1219 t.Run("fast", func(t *testing.T) { 1220 ss, err := NewDirectorySearcherFast(dir) 1221 if err != nil { 1222 t.Fatal(err) 1223 } 1224 t.Cleanup(ss.Close) 1225 1226 deadline := testDeadline(t, 10*time.Second) 1227 1228 // Wait for scanning of directory to be done. We should be returning 1229 // non-zero crashes until then. 1230 waitForPredicate(deadline, 10*time.Millisecond, func() bool { 1231 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1232 if err != nil { 1233 t.Fatal(err) 1234 } 1235 return res.Stats.Crashes == 0 1236 }) 1237 1238 test(t, ss) 1239 }) 1240} 1241 1242// testDeadline returns the deadline for t, but ensures it is no longer than 1243// maxTimeout away. 1244func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time { 1245 deadline := time.Now().Add(maxTimeout) 1246 if d, ok := t.Deadline(); ok && d.Before(deadline) { 1247 // give 1s for us to do a final test run 1248 deadline = d.Add(-time.Second) 1249 } 1250 return deadline 1251} 1252 1253func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool { 1254 for time.Now().Before(deadline) { 1255 if pred() { 1256 return true 1257 } 1258 1259 time.Sleep(tick) 1260 } 1261 1262 return pred() 1263}