fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package shards 16 17import ( 18 "bytes" 19 "context" 20 "fmt" 21 "hash/fnv" 22 "log" 23 "math" 24 "os" 25 "reflect" 26 "runtime" 27 "sort" 28 "strconv" 29 "testing" 30 "testing/quick" 31 "time" 32 33 "github.com/RoaringBitmap/roaring" 34 "github.com/google/go-cmp/cmp" 35 "github.com/google/go-cmp/cmp/cmpopts" 36 "github.com/grafana/regexp" 37 38 "github.com/sourcegraph/zoekt" 39 "github.com/sourcegraph/zoekt/query" 40 "github.com/sourcegraph/zoekt/stream" 41) 42 43type crashSearcher struct{} 44 45func (s *crashSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 46 panic("search") 47} 48 49func (s *crashSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 50 panic("list") 51} 52 53func (s *crashSearcher) Stats() (*zoekt.RepoStats, error) { 54 return &zoekt.RepoStats{}, nil 55} 56 57func (s *crashSearcher) Close() {} 58 59func (s *crashSearcher) String() string { return "crashSearcher" } 60 61func TestCrashResilience(t *testing.T) { 62 out := &bytes.Buffer{} 63 log.SetOutput(out) 64 defer log.SetOutput(os.Stderr) 65 ss := newShardedSearcher(2) 66 ss.ranked.Store([]*rankedShard{{Searcher: &crashSearcher{}}}) 67 68 var wantCrashes int 69 test := func(t *testing.T) { 70 q := &query.Substring{Pattern: "hoi"} 71 opts := &zoekt.SearchOptions{} 72 if res, err := ss.Search(context.Background(), q, opts); err != nil { 73 t.Fatalf("Search: %v", err) 74 } else if res.Stats.Crashes != wantCrashes { 75 t.Errorf("got stats %#v, want crashes = %d", res.Stats, wantCrashes) 76 } 77 78 if res, err := ss.List(context.Background(), q, nil); err != nil { 79 t.Fatalf("List: %v", err) 80 } else if res.Crashes != wantCrashes { 81 t.Errorf("got result %#v, want crashes = %d", res, wantCrashes) 82 } 83 } 84 85 // Before we are marked as ready we have one extra crash 86 wantCrashes = 2 87 t.Run("loading", test) 88 89 // After marking as ready we should only have the crashSearcher 90 // contributing. 91 ss.markReady() 92 wantCrashes = 1 93 t.Run("ready", test) 94} 95 96type rankSearcher struct { 97 rank uint16 98 repo *zoekt.Repository 99} 100 101func (s *rankSearcher) Close() { 102} 103 104func (s *rankSearcher) String() string { 105 return "" 106} 107 108func (s *rankSearcher) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (*zoekt.SearchResult, error) { 109 select { 110 case <-ctx.Done(): 111 return &zoekt.SearchResult{}, nil 112 default: 113 } 114 115 // Ugly, but without sleep it's too fast, and we can't 116 // simulate the cutoff. 117 time.Sleep(time.Millisecond) 118 return &zoekt.SearchResult{ 119 Files: []zoekt.FileMatch{ 120 { 121 FileName: fmt.Sprintf("f%d", s.rank), 122 Score: float64(s.rank), 123 }, 124 }, 125 Stats: zoekt.Stats{ 126 MatchCount: 1, 127 }, 128 }, nil 129} 130 131func (s *rankSearcher) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (*zoekt.RepoList, error) { 132 r := zoekt.Repository{} 133 if s.repo != nil { 134 r = *s.repo 135 } 136 r.Rank = s.rank 137 return &zoekt.RepoList{ 138 Repos: []*zoekt.RepoListEntry{ 139 {Repository: r}, 140 }, 141 }, nil 142} 143 144func (s *rankSearcher) Repository() *zoekt.Repository { return s.repo } 145 146func TestOrderByShard(t *testing.T) { 147 ss := newShardedSearcher(1) 148 149 n := 10 * runtime.GOMAXPROCS(0) 150 for i := 0; i < n; i++ { 151 ss.replace(map[string]zoekt.Searcher{ 152 fmt.Sprintf("shard%d", i): &rankSearcher{rank: uint16(i)}, 153 }) 154 } 155 156 if res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}); err != nil { 157 t.Errorf("Search: %v", err) 158 } else if len(res.Files) != n { 159 t.Fatalf("empty options: got %d results, want %d", len(res.Files), n) 160 } 161 162 opts := zoekt.SearchOptions{ 163 TotalMaxMatchCount: 3, 164 } 165 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &opts) 166 if err != nil { 167 t.Errorf("Search: %v", err) 168 } 169 170 if len(res.Files) < opts.TotalMaxMatchCount { 171 t.Errorf("got %d results, want %d", len(res.Files), opts.TotalMaxMatchCount) 172 } 173 if len(res.Files) == n { 174 t.Errorf("got %d results, want < %d", len(res.Files), n) 175 } 176 for i, f := range res.Files { 177 rev := n - 1 - i 178 want := fmt.Sprintf("f%d", rev) 179 got := f.FileName 180 181 if got != want { 182 t.Logf("%d: got %q, want %q", i, got, want) 183 } 184 } 185} 186 187func TestShardedSearcher_Ranking(t *testing.T) { 188 ss := newShardedSearcher(1) 189 190 var nextShardNum int 191 addShard := func(repo string, priority float64, docs ...zoekt.Document) { 192 r := &zoekt.Repository{ID: hash(repo), Name: repo} 193 r.RawConfig = map[string]string{ 194 "public": "1", 195 "priority": strconv.FormatFloat(priority, 'f', 2, 64), 196 } 197 b := testIndexBuilder(t, r, docs...) 198 shard := searcherForTest(t, b) 199 ss.replace(map[string]zoekt.Searcher{ 200 fmt.Sprintf("key-%d", nextShardNum): shard, 201 }) 202 nextShardNum++ 203 } 204 205 addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 206 addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 207 addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 208 addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 209 210 want := []string{ 211 "super-star", 212 "moderately-popular", 213 "weekend-project", 214 "weekend-project-2", 215 } 216 217 var have []string 218 for _, s := range ss.getLoaded().shards { 219 for _, r := range s.repos { 220 have = append(have, r.Name) 221 } 222 } 223 224 if !reflect.DeepEqual(want, have) { 225 t.Fatalf("\nwant: %s\nhave: %s", want, have) 226 } 227} 228 229func TestShardedSearcher_DocumentRanking(t *testing.T) { 230 ss := newShardedSearcher(1) 231 232 var nextShardNum int 233 addShard := func(repo string, priority float64, docs ...zoekt.Document) { 234 r := &zoekt.Repository{ID: hash(repo), Name: repo} 235 r.RawConfig = map[string]string{ 236 "public": "1", 237 "priority": strconv.FormatFloat(priority, 'f', 2, 64), 238 } 239 b := testIndexBuilder(t, r, docs...) 240 shard := searcherForTest(t, b) 241 ss.replace(map[string]zoekt.Searcher{ 242 fmt.Sprintf("key-%d", nextShardNum): shard, 243 }) 244 nextShardNum++ 245 } 246 247 addShard("weekend-project", 20, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 248 addShard("moderately-popular", 500, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 249 addShard("weekend-project-2", 20, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 250 addShard("super-star", 5000, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 251 zoekt.Document{Name: "f5", Content: []byte("fooooo")}) 252 253 // Run a stream search and gather the results 254 var results []*zoekt.SearchResult 255 opts := &zoekt.SearchOptions{ 256 UseDocumentRanks: true, 257 FlushWallTime: 100 * time.Millisecond, 258 } 259 260 err := ss.StreamSearch(context.Background(), &query.Substring{Pattern: "foo"}, opts, 261 stream.SenderFunc(func(event *zoekt.SearchResult) { 262 results = append(results, event) 263 })) 264 if err != nil { 265 t.Fatal(err) 266 } 267 268 // There should always be two stream results, first progress-only, then the file results 269 if len(results) != 2 { 270 t.Fatalf("expected 2 streamed results, but got %d", len(results)) 271 } 272 273 // The ranking should be determined by whether it's an exact word match, 274 // followed by repository priority 275 want := []string{"f4", "f3", "f5", "f2", "f1"} 276 277 files := results[1].Files 278 got := make([]string, len(files)) 279 for i := 0; i < len(files); i++ { 280 got[i] = files[i].FileName 281 } 282 283 if !reflect.DeepEqual(got, want) { 284 t.Errorf("got %v, want %v", got, want) 285 } 286} 287 288func TestFilteringShardsByRepoSetOrBranchesReposOrRepoIDs(t *testing.T) { 289 ss := newShardedSearcher(1) 290 291 // namePrefix is so we can create a repo:foo filter and match the same set 292 // of repos. 293 namePrefix := [3]string{"foo", "bar", "baz"} 294 295 repoSetNames := []string{} 296 repoIDs := []uint32{} 297 n := 10 * runtime.GOMAXPROCS(0) 298 for i := 0; i < n; i++ { 299 shardName := fmt.Sprintf("shard%d", i) 300 repoName := fmt.Sprintf("%s-repository%.3d", namePrefix[i%3], i) 301 repoID := hash(repoName) 302 303 if i%3 == 0 { 304 repoSetNames = append(repoSetNames, repoName) 305 repoIDs = append(repoIDs, repoID) 306 } 307 308 ss.replace(map[string]zoekt.Searcher{ 309 shardName: &rankSearcher{ 310 repo: &zoekt.Repository{ID: repoID, Name: repoName}, 311 rank: uint16(n - i), 312 }, 313 }) 314 } 315 316 res, err := ss.Search(context.Background(), &query.Substring{Pattern: "bla"}, &zoekt.SearchOptions{}) 317 if err != nil { 318 t.Errorf("Search: %v", err) 319 } 320 if len(res.Files) != n { 321 t.Fatalf("no reposet: got %d results, want %d", len(res.Files), n) 322 } 323 324 branchesRepos := &query.BranchesRepos{List: []query.BranchRepos{ 325 {Branch: "HEAD", Repos: roaring.New()}, 326 }} 327 328 for _, name := range repoSetNames { 329 branchesRepos.List[0].Repos.Add(hash(name)) 330 } 331 332 set := query.NewRepoSet(repoSetNames...) 333 sub := &query.Substring{Pattern: "bla"} 334 335 repoIDsQuery := query.NewRepoIDs(repoIDs...) 336 repoQuery := &query.Repo{Regexp: regexp.MustCompile("^foo-.*")} 337 338 queries := []query.Q{ 339 query.NewAnd(set, sub), 340 // Test with the same reposet again 341 query.NewAnd(set, sub), 342 343 query.NewAnd(branchesRepos, sub), 344 // Test with the same repoBranches with IDs again 345 query.NewAnd(branchesRepos, sub), 346 347 query.NewAnd(repoIDsQuery, sub), 348 // Test with the same repoIDs again 349 query.NewAnd(repoIDsQuery, sub), 350 351 query.NewAnd(repoQuery, sub), 352 query.NewAnd(repoQuery, sub), 353 354 // List has queries which are just the reposet atoms. We also test twice. 355 set, 356 set, 357 branchesRepos, 358 branchesRepos, 359 repoIDsQuery, 360 repoIDsQuery, 361 repoQuery, 362 repoQuery, 363 } 364 365 for _, q := range queries { 366 res, err = ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 367 if err != nil { 368 t.Errorf("Search(%s): %v", q, err) 369 } 370 // Note: Assertion is based on fact that `rankSearcher` always returns a 371 // result and using repoSet will half the number of results 372 if len(res.Files) != len(repoSetNames) { 373 t.Fatalf("%s: got %d results, want %d", q, len(res.Files), len(repoSetNames)) 374 } 375 } 376} 377 378func hash(name string) uint32 { 379 h := fnv.New32() 380 h.Write([]byte(name)) 381 return h.Sum32() 382} 383 384type memSeeker struct { 385 data []byte 386} 387 388func (s *memSeeker) Name() string { 389 return "memseeker" 390} 391 392func (s *memSeeker) Close() {} 393func (s *memSeeker) Read(off, sz uint32) ([]byte, error) { 394 return s.data[off : off+sz], nil 395} 396 397func (s *memSeeker) Size() (uint32, error) { 398 return uint32(len(s.data)), nil 399} 400 401func TestUnloadIndex(t *testing.T) { 402 b := testIndexBuilder(t, nil, zoekt.Document{ 403 Name: "filename", 404 Content: []byte("needle needle needle"), 405 }) 406 407 var buf bytes.Buffer 408 if err := b.Write(&buf); err != nil { 409 t.Fatal(err) 410 } 411 indexBytes := buf.Bytes() 412 indexFile := &memSeeker{indexBytes} 413 searcher, err := zoekt.NewSearcher(indexFile) 414 if err != nil { 415 t.Fatalf("NewSearcher: %v", err) 416 } 417 418 ss := newShardedSearcher(2) 419 ss.replace(map[string]zoekt.Searcher{"key": searcher}) 420 421 var opts zoekt.SearchOptions 422 q := &query.Substring{Pattern: "needle"} 423 res, err := ss.Search(context.Background(), q, &opts) 424 if err != nil { 425 t.Fatalf("Search(%s): %v", q, err) 426 } 427 428 forbidden := byte(29) 429 for i := range indexBytes { 430 // non-ASCII 431 indexBytes[i] = forbidden 432 } 433 434 for _, f := range res.Files { 435 if bytes.Contains(f.Content, []byte{forbidden}) { 436 t.Errorf("found %d in content %q", forbidden, f.Content) 437 } 438 if bytes.Contains(f.Checksum, []byte{forbidden}) { 439 t.Errorf("found %d in checksum %q", forbidden, f.Checksum) 440 } 441 442 for _, l := range f.LineMatches { 443 if bytes.Contains(l.Line, []byte{forbidden}) { 444 t.Errorf("found %d in line %q", forbidden, l.Line) 445 } 446 } 447 } 448} 449 450func TestShardedSearcher_List(t *testing.T) { 451 repos := []*zoekt.Repository{ 452 { 453 ID: 1234, 454 Name: "repo-a", 455 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 456 RawConfig: map[string]string{"repoid": "1234"}, 457 }, 458 { 459 Name: "repo-b", 460 Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 461 }, 462 } 463 464 doc := zoekt.Document{ 465 Name: "foo.go", 466 Content: []byte("bar\nbaz"), 467 Branches: []string{"main", "dev"}, 468 } 469 470 // Test duplicate removal when ListOptions.Minimal is true and false 471 ss := newShardedSearcher(4) 472 ss.replace(map[string]zoekt.Searcher{ 473 "1": searcherForTest(t, testIndexBuilder(t, repos[0], doc)), 474 "2": searcherForTest(t, testIndexBuilder(t, repos[0])), 475 "3": searcherForTest(t, testIndexBuilder(t, repos[1], doc)), 476 "4": searcherForTest(t, testIndexBuilder(t, repos[1])), 477 }) 478 ss.markReady() 479 480 stats := zoekt.RepoStats{ 481 Shards: 2, 482 Documents: 1, 483 IndexBytes: 196, 484 ContentBytes: 13, 485 NewLinesCount: 1, 486 DefaultBranchNewLinesCount: 1, 487 OtherBranchesNewLinesCount: 1, 488 } 489 490 aggStats := stats 491 aggStats.Add(&aggStats) // since both repos have the exact same stats, this works 492 aggStats.Repos = 2 // Add doesn't populate Repos, this is done in Shards based on the response sizes. 493 494 for _, tc := range []struct { 495 name string 496 opts *zoekt.ListOptions 497 want *zoekt.RepoList 498 }{ 499 { 500 name: "nil opts", 501 opts: nil, 502 want: &zoekt.RepoList{ 503 Repos: []*zoekt.RepoListEntry{ 504 { 505 Repository: *repos[0], 506 Stats: stats, 507 }, 508 { 509 Repository: *repos[1], 510 Stats: stats, 511 }, 512 }, 513 Stats: aggStats, 514 }, 515 }, 516 { 517 name: "default", 518 opts: &zoekt.ListOptions{}, 519 want: &zoekt.RepoList{ 520 Repos: []*zoekt.RepoListEntry{ 521 { 522 Repository: *repos[0], 523 Stats: stats, 524 }, 525 { 526 Repository: *repos[1], 527 Stats: stats, 528 }, 529 }, 530 Stats: aggStats, 531 }, 532 }, 533 { 534 name: "field=repos", 535 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldRepos}, 536 want: &zoekt.RepoList{ 537 Repos: []*zoekt.RepoListEntry{ 538 { 539 Repository: *repos[0], 540 Stats: stats, 541 }, 542 { 543 Repository: *repos[1], 544 Stats: stats, 545 }, 546 }, 547 Stats: aggStats, 548 }, 549 }, 550 { 551 name: "field=reposmap", 552 opts: &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}, 553 want: &zoekt.RepoList{ 554 Repos: []*zoekt.RepoListEntry{ 555 { 556 Repository: *repos[1], 557 Stats: stats, 558 }, 559 }, 560 ReposMap: zoekt.ReposMap{ 561 repos[0].ID: { 562 HasSymbols: repos[0].HasSymbols, 563 Branches: repos[0].Branches, 564 }, 565 }, 566 Stats: aggStats, 567 }, 568 }, 569 } { 570 tc := tc 571 t.Run(tc.name, func(t *testing.T) { 572 t.Parallel() 573 574 q := &query.Repo{Regexp: regexp.MustCompile("repo")} 575 576 res, err := ss.List(context.Background(), q, tc.opts) 577 if err != nil { 578 t.Fatalf("List(%v, %s): %v", q, tc.opts, err) 579 } 580 581 sort.Slice(res.Repos, func(i, j int) bool { 582 return res.Repos[i].Repository.Name < res.Repos[j].Repository.Name 583 }) 584 585 ignored := []cmp.Option{ 586 cmpopts.EquateEmpty(), 587 cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 588 cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 589 cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 590 cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 591 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 592 } 593 594 if diff := cmp.Diff(tc.want, res, ignored...); diff != "" { 595 t.Fatalf("mismatch (-want +got):\n%s", diff) 596 } 597 }) 598 } 599} 600 601func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder { 602 b, err := zoekt.NewIndexBuilder(repo) 603 if err != nil { 604 t.Fatalf("NewIndexBuilder: %v", err) 605 } 606 607 for i, d := range docs { 608 if err := b.Add(d); err != nil { 609 t.Fatalf("Add %d: %v", i, err) 610 } 611 } 612 return b 613} 614 615func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { 616 var buf bytes.Buffer 617 if err := b.Write(&buf); err != nil { 618 t.Fatal(err) 619 } 620 f := &memSeeker{buf.Bytes()} 621 622 searcher, err := zoekt.NewSearcher(f) 623 if err != nil { 624 t.Fatalf("NewSearcher: %v", err) 625 } 626 627 return searcher 628} 629 630func reposForTest(n int) (result []*zoekt.Repository) { 631 for i := 0; i < n; i++ { 632 result = append(result, &zoekt.Repository{ 633 ID: uint32(i + 1), 634 Name: fmt.Sprintf("test-repository-%d", i), 635 }) 636 } 637 return result 638} 639 640func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { 641 builder := testIndexBuilder(b, r) 642 643 if err := builder.Add(zoekt.Document{ 644 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), 645 Content: []byte("needle needle needle haystack"), 646 }); err != nil { 647 b.Fatal(err) 648 } 649 650 for i := 1; i < numFiles; i++ { 651 if err := builder.Add(zoekt.Document{ 652 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), 653 Content: []byte("haystack haystack haystack"), 654 }); err != nil { 655 b.Fatal(err) 656 } 657 } 658 659 return searcherForTest(b, builder) 660} 661 662func BenchmarkShardedSearch(b *testing.B) { 663 ss := newShardedSearcher(int64(runtime.GOMAXPROCS(0))) 664 665 filesPerRepo := 300 666 repos := reposForTest(3000) 667 var repoSetIDs []uint32 668 669 shards := make(map[string]zoekt.Searcher, len(repos)) 670 for i, r := range repos { 671 shards[r.Name] = testSearcherForRepo(b, r, filesPerRepo) 672 if i%2 == 0 { 673 repoSetIDs = append(repoSetIDs, r.ID) 674 } 675 } 676 677 ss.replace(shards) 678 679 ctx := context.Background() 680 opts := &zoekt.SearchOptions{} 681 682 needleSub := &query.Substring{Pattern: "needle"} 683 haystackSub := &query.Substring{Pattern: "haystack"} 684 helloworldSub := &query.Substring{Pattern: "helloworld"} 685 haystackCap, err := query.Parse("hay(s(t))(a)ck") 686 if err != nil { 687 b.Fatal(err) 688 } 689 690 haystackNonCap, err := query.Parse("hay(?:s(?:t))(?:a)ck") 691 if err != nil { 692 b.Fatal(err) 693 } 694 695 setAnd := func(q query.Q) func() query.Q { 696 return func() query.Q { 697 return query.NewAnd(query.NewSingleBranchesRepos("head", repoSetIDs...), q) 698 } 699 } 700 701 search := func(b *testing.B, q query.Q, wantFiles int) { 702 b.Helper() 703 704 res, err := ss.Search(ctx, q, opts) 705 if err != nil { 706 b.Fatalf("Search(%s): %v", q, err) 707 } 708 if have := len(res.Files); have != wantFiles { 709 b.Fatalf("wrong number of file results. have=%d, want=%d", have, wantFiles) 710 } 711 } 712 713 benchmarks := []struct { 714 name string 715 q func() query.Q 716 wantFiles int 717 }{ 718 {"substring all results", func() query.Q { return haystackSub }, len(repos) * filesPerRepo}, 719 {"substring no results", func() query.Q { return helloworldSub }, 0}, 720 {"substring some results", func() query.Q { return needleSub }, len(repos)}, 721 722 {"regexp all results capture", func() query.Q { return haystackCap }, len(repos) * filesPerRepo}, 723 {"regexp all results non-capture", func() query.Q { return haystackNonCap }, len(repos) * filesPerRepo}, 724 725 {"substring all results and repo set", setAnd(haystackSub), len(repoSetIDs) * filesPerRepo}, 726 {"substring some results and repo set", setAnd(needleSub), len(repoSetIDs)}, 727 {"substring no results and repo set", setAnd(helloworldSub), 0}, 728 } 729 730 for _, bb := range benchmarks { 731 b.Run(bb.name, func(b *testing.B) { 732 b.ReportAllocs() 733 734 for n := 0; n < b.N; n++ { 735 q := bb.q() 736 737 search(b, q, bb.wantFiles) 738 } 739 }) 740 } 741} 742 743func TestRawQuerySearch(t *testing.T) { 744 ss := newShardedSearcher(1) 745 746 var nextShardNum int 747 addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) { 748 r := &zoekt.Repository{Name: repo} 749 r.RawConfig = rawConfig 750 b := testIndexBuilder(t, r, docs...) 751 shard := searcherForTest(t, b) 752 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) 753 nextShardNum++ 754 } 755 addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 756 addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 757 addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 758 759 cases := []struct { 760 pattern string 761 flags query.RawConfig 762 wantRepos []string 763 wantFiles int 764 }{ 765 { 766 pattern: "bas", 767 flags: query.RcOnlyPublic, 768 wantRepos: []string{"public"}, 769 wantFiles: 1, 770 }, 771 { 772 pattern: "foo", 773 flags: query.RcOnlyPublic, 774 wantRepos: []string{"public", "public_fork"}, 775 wantFiles: 2, 776 }, 777 { 778 pattern: "foo", 779 flags: query.RcOnlyPublic | query.RcNoForks, 780 wantRepos: []string{"public"}, 781 wantFiles: 1, 782 }, 783 { 784 pattern: "bar", 785 flags: query.RcOnlyForks, 786 wantRepos: []string{"public_fork"}, 787 wantFiles: 1, 788 }, 789 { 790 pattern: "bas", 791 flags: query.RcNoArchived, 792 wantRepos: []string{"public"}, 793 wantFiles: 1, 794 }, 795 { 796 pattern: "foo", 797 flags: query.RcNoForks, 798 wantRepos: []string{"public", "private_archived"}, 799 wantFiles: 2, 800 }, 801 { 802 pattern: "bas", 803 flags: query.RcOnlyArchived, 804 wantRepos: []string{"private_archived"}, 805 wantFiles: 1, 806 }, 807 { 808 pattern: "foo", 809 flags: query.RcOnlyPrivate, 810 wantRepos: []string{"private_archived"}, 811 wantFiles: 1, 812 }, 813 { 814 pattern: "foo", 815 flags: query.RcOnlyPrivate | query.RcNoArchived, 816 wantRepos: []string{}, 817 wantFiles: 0, 818 }, 819 } 820 for _, c := range cases { 821 t.Run(fmt.Sprintf("pattern:%s", c.pattern), func(t *testing.T) { 822 q := query.NewAnd(&query.Substring{Pattern: c.pattern}, c.flags) 823 824 sr, err := ss.Search(context.Background(), q, &zoekt.SearchOptions{}) 825 if err != nil { 826 t.Fatal(err) 827 } 828 829 if got := len(sr.Files); got != c.wantFiles { 830 t.Fatalf("wanted %d, got %d", c.wantFiles, got) 831 } 832 833 if c.wantFiles == 0 { 834 return 835 } 836 837 gotRepos := make([]string, 0, len(sr.RepoURLs)) 838 for k := range sr.RepoURLs { 839 gotRepos = append(gotRepos, k) 840 } 841 sort.Strings(gotRepos) 842 sort.Strings(c.wantRepos) 843 if d := cmp.Diff(c.wantRepos, gotRepos); d != "" { 844 t.Fatalf("(-want, +got):\n%s", d) 845 } 846 }) 847 } 848} 849 850func TestPrioritySlice(t *testing.T) { 851 p := &prioritySlice{} 852 for step, oper := range []struct { 853 isAppend bool 854 value float64 855 expectedMax float64 856 }{ 857 {true, 1, 1}, 858 {true, 3, 3}, 859 {true, 2, 3}, 860 {false, 1, 3}, 861 {false, 3, 2}, 862 {false, 2, math.Inf(-1)}, 863 } { 864 if oper.isAppend { 865 p.append(oper.value) 866 } else { 867 p.remove(oper.value) 868 } 869 max := p.max() 870 if max != oper.expectedMax { 871 t.Errorf("%d: got %f, want %f", step, max, oper.expectedMax) 872 } 873 } 874} 875 876func TestSendByRepository(t *testing.T) { 877 wantStats := zoekt.Stats{} 878 wantStats.ShardsScanned = 1 879 880 // n1, n2, n3 are the number of file matches for each of the 3 repositories in this 881 // test. 882 f := func(n1, n2, n3 uint8) bool { 883 sr := createMockSearchResult(n1, n2, n3, wantStats) 884 885 mock := &mockSender{} 886 sendByRepository(sr, &zoekt.SearchOptions{}, mock) 887 888 if diff := cmp.Diff(wantStats, mock.stats); diff != "" { 889 t.Logf("-want,+got\n%s", diff) 890 return false 891 } 892 893 nonZero := 0 894 for _, l := range []uint8{n1, n2, n3} { 895 if l > 0 { 896 nonZero++ 897 } 898 } 899 if l := len(mock.files); l != nonZero { 900 t.Logf("wanted results from %d repositores, got %d", nonZero, l) 901 return false 902 } 903 904 gotTotal := 0 905 for _, fs := range mock.files { 906 gotTotal += len(fs) 907 } 908 wantTotal := int(n1) + int(n2) + int(n3) 909 if gotTotal != wantTotal { 910 t.Logf("wanted %d file matches, got %d", wantTotal, gotTotal) 911 return false 912 } 913 914 for _, fs := range mock.files { 915 if len(fs) == 0 { 916 t.Logf("got search result with 0 file matches after split") 917 return false 918 } 919 } 920 return true 921 } 922 923 if err := quick.Check(f, nil); err != nil { 924 t.Error(err) 925 } 926} 927 928type mockSender struct { 929 stats zoekt.Stats 930 files [][]zoekt.FileMatch 931} 932 933func (s *mockSender) Send(sr *zoekt.SearchResult) { 934 s.stats.Add(sr.Stats) 935 if len(sr.Files) == 0 { 936 return 937 } 938 s.files = append(s.files, sr.Files) 939} 940 941func createMockSearchResult(n1, n2, n3 uint8, stats zoekt.Stats) *zoekt.SearchResult { 942 sr := &zoekt.SearchResult{RepoURLs: make(map[string]string)} 943 for i, n := range []uint8{n1, n2, n3} { 944 if n == 0 { 945 continue 946 } 947 tmp := mkSearchResult(int(n), uint32(i)) 948 sr.Files = append(sr.Files, tmp.Files...) 949 for k := range tmp.RepoURLs { 950 sr.RepoURLs[k] = "" 951 } 952 } 953 sr.Stats = stats 954 return sr 955} 956 957func mkSearchResult(n int, repoID uint32) *zoekt.SearchResult { 958 if n == 0 { 959 return &zoekt.SearchResult{} 960 } 961 fm := make([]zoekt.FileMatch, 0, n) 962 for i := 0; i < n; i++ { 963 fm = append(fm, zoekt.FileMatch{Repository: fmt.Sprintf("repo%d", repoID), RepositoryID: repoID}) 964 } 965 966 return &zoekt.SearchResult{Files: fm, RepoURLs: map[string]string{fmt.Sprintf("repo%d", repoID): ""}} 967} 968 969func TestFileBasedSearch(t *testing.T) { 970 cases := []struct { 971 name string 972 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 973 }{ 974 {"Search", testShardedSearch}, 975 {"StreamSearch", testShardedStreamSearch}, 976 } 977 978 c1 := []byte("I love bananas without skin") 979 // -----------0123456789012345678901234567890123456789 980 c2 := []byte("In Dutch, ananas means pineapple") 981 // -----------0123456789012345678901234567890123456789 982 b := testIndexBuilder(t, nil, 983 zoekt.Document{Name: "f1", Content: c1}, 984 zoekt.Document{Name: "f2", Content: c2}, 985 ) 986 987 for _, tt := range cases { 988 for _, useDocumentRanks := range []bool{false, true} { 989 t.Run(tt.name, func(t *testing.T) { 990 matches := tt.testShardedSearch(t, &query.Substring{ 991 CaseSensitive: false, 992 Pattern: "ananas", 993 }, b, useDocumentRanks) 994 995 if len(matches) != 2 { 996 t.Fatalf("got %v, want 2 matches", matches) 997 } 998 if matches[0].FileName != "f2" || matches[1].FileName != "f1" { 999 t.Fatalf("got %v, want matches {f1,f2}", matches) 1000 } 1001 if matches[0].LineMatches[0].LineFragments[0].Offset != 10 || matches[1].LineMatches[0].LineFragments[0].Offset != 8 { 1002 t.Fatalf("got %#v, want offsets 10,8", matches) 1003 } 1004 }) 1005 } 1006 } 1007} 1008 1009func TestWordBoundaryRanking(t *testing.T) { 1010 cases := []struct { 1011 name string 1012 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1013 }{ 1014 {"Search", testShardedSearch}, 1015 {"StreamSearch", testShardedStreamSearch}, 1016 } 1017 1018 b := testIndexBuilder(t, nil, 1019 zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")}, 1020 zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, 1021 // -----------------------------------------0123456 789012 34567890 1022 zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")}) 1023 1024 for _, tt := range cases { 1025 for _, useDocumentRanks := range []bool{false, true} { 1026 t.Run(tt.name, func(t *testing.T) { 1027 files := tt.testShardedSearch(t, &query.Substring{Pattern: "byte"}, b, useDocumentRanks) 1028 1029 if len(files) != 3 { 1030 t.Fatalf("got %#v, want 3 files", files) 1031 } 1032 1033 file0 := files[0] 1034 if file0.FileName != "f2" || len(file0.LineMatches) != 3 { 1035 t.Fatalf("got file %s, num matches %d (%#v), want 3 matches in file f2", file0.FileName, len(file0.LineMatches), file0) 1036 } 1037 1038 if file0.LineMatches[0].LineFragments[0].Offset != 13 { 1039 t.Fatalf("got first match %#v, want full word match", files[0].LineMatches[0]) 1040 } 1041 if file0.LineMatches[1].LineFragments[0].Offset != 7 { 1042 t.Fatalf("got second match %#v, want partial word match", files[0].LineMatches[1]) 1043 } 1044 }) 1045 } 1046 } 1047} 1048 1049func TestAtomCountScore(t *testing.T) { 1050 cases := []struct { 1051 name string 1052 testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1053 }{ 1054 {"Search", testShardedSearch}, 1055 {"StreamSearch", testShardedStreamSearch}, 1056 } 1057 1058 b := testIndexBuilder(t, 1059 &zoekt.Repository{ 1060 Branches: []zoekt.RepositoryBranch{ 1061 {Name: "branches", Version: "v1"}, 1062 {Name: "needle", Version: "v2"}, 1063 }, 1064 }, 1065 zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, 1066 zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, 1067 zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) 1068 1069 for _, tt := range cases { 1070 for _, useDocumentRanks := range []bool{false, true} { 1071 t.Run(tt.name, func(t *testing.T) { 1072 files := tt.testShardedSearch(t, 1073 query.NewOr( 1074 &query.Substring{Pattern: "needle"}, 1075 &query.Substring{Pattern: "needle", FileName: true}, 1076 &query.Branch{Pattern: "needle"}, 1077 ), b, useDocumentRanks) 1078 var got []string 1079 for _, f := range files { 1080 got = append(got, f.FileName) 1081 } 1082 want := []string{"needle-file-branch", "needle-file", "f1"} 1083 if !reflect.DeepEqual(got, want) { 1084 t.Errorf("got %v, want %v", got, want) 1085 } 1086 }) 1087 } 1088 } 1089} 1090 1091func TestUseKeywordScoring(t *testing.T) { 1092 b := testIndexBuilder(t, 1093 &zoekt.Repository{}, 1094 zoekt.Document{Name: "f1", Content: []byte("one two two three")}, 1095 zoekt.Document{Name: "f2", Content: []byte("one two one two")}, 1096 zoekt.Document{Name: "f3", Content: []byte("one three three three")}) 1097 1098 ss := newShardedSearcher(1) 1099 searcher := searcherForTest(t, b) 1100 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1101 1102 q := query.NewOr( 1103 &query.Substring{Pattern: "one"}, 1104 &query.Substring{Pattern: "three"}) 1105 1106 opts := zoekt.SearchOptions{ 1107 UseKeywordScoring: true, 1108 } 1109 1110 results, err := ss.Search(context.Background(), q, &opts) 1111 if err != nil { 1112 t.Fatal(err) 1113 } 1114 1115 var got []string 1116 for _, f := range results.Files { 1117 got = append(got, f.FileName) 1118 } 1119 1120 want := []string{"f3", "f1", "f2"} 1121 if !reflect.DeepEqual(got, want) { 1122 t.Errorf("got %v, want %v", got, want) 1123 } 1124} 1125 1126func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1127 ss := newShardedSearcher(1) 1128 searcher := searcherForTest(t, ib) 1129 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1130 1131 var files []zoekt.FileMatch 1132 sender := stream.SenderFunc(func(result *zoekt.SearchResult) { 1133 files = append(files, result.Files...) 1134 }) 1135 1136 opts := zoekt.SearchOptions{} 1137 if useDocumentRanks { 1138 opts.UseDocumentRanks = true 1139 opts.FlushWallTime = 10 * time.Millisecond 1140 } 1141 if err := ss.StreamSearch(context.Background(), q, &opts, sender); err != nil { 1142 t.Fatal(err) 1143 } 1144 return files 1145} 1146 1147func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1148 ss := newShardedSearcher(1) 1149 searcher := searcherForTest(t, ib) 1150 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) 1151 1152 opts := zoekt.SearchOptions{} 1153 if useDocumentRanks { 1154 opts.UseDocumentRanks = true 1155 opts.FlushWallTime = 50 * time.Millisecond 1156 } 1157 sres, _ := ss.Search(context.Background(), q, &opts) 1158 return sres.Files 1159} 1160 1161// Ensure we work on empty shard directories. 1162func TestNewDirectorySearcher_empty(t *testing.T) { 1163 ctx := context.Background() 1164 1165 test := func(t *testing.T, ss zoekt.Streamer) { 1166 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1167 if err != nil { 1168 t.Fatal("Search non-nil error", err) 1169 } 1170 1171 if diff := cmp.Diff(&zoekt.SearchResult{}, res, cmpopts.IgnoreFields(zoekt.Stats{}, "Duration", "Wait"), cmpopts.EquateEmpty()); diff != "" { 1172 t.Fatalf("Search had non empty results (-want, +got):\n%s", diff) 1173 } 1174 1175 rl, err := ss.List(ctx, &query.Const{Value: true}, nil) 1176 if err != nil { 1177 t.Fatal("List non-nil error", err) 1178 } 1179 if diff := cmp.Diff(&zoekt.RepoList{}, rl, cmpopts.EquateEmpty()); diff != "" { 1180 t.Fatalf("List had non empty results (-want, +got):\n%s", diff) 1181 } 1182 } 1183 1184 dir := t.TempDir() 1185 t.Run("blocking", func(t *testing.T) { 1186 ss, err := NewDirectorySearcher(dir) 1187 if err != nil { 1188 t.Fatal(err) 1189 } 1190 t.Cleanup(ss.Close) 1191 1192 // We expect crashes to be empty as soon as NewDirectorySearcher returns 1193 // so we can validate straight away. 1194 test(t, ss) 1195 }) 1196 1197 t.Run("fast", func(t *testing.T) { 1198 ss, err := NewDirectorySearcherFast(dir) 1199 if err != nil { 1200 t.Fatal(err) 1201 } 1202 t.Cleanup(ss.Close) 1203 1204 deadline := testDeadline(t, 10*time.Second) 1205 1206 // Wait for scanning of directory to be done. We should be returning 1207 // non-zero crashes until then. 1208 waitForPredicate(deadline, 10*time.Millisecond, func() bool { 1209 res, err := ss.Search(ctx, &query.Const{Value: true}, nil) 1210 if err != nil { 1211 t.Fatal(err) 1212 } 1213 return res.Stats.Crashes == 0 1214 }) 1215 1216 test(t, ss) 1217 }) 1218} 1219 1220// testDeadline returns the deadline for t, but ensures it is no longer than 1221// maxTimeout away. 1222func testDeadline(t *testing.T, maxTimeout time.Duration) time.Time { 1223 deadline := time.Now().Add(maxTimeout) 1224 if d, ok := t.Deadline(); ok && d.Before(deadline) { 1225 // give 1s for us to do a final test run 1226 deadline = d.Add(-time.Second) 1227 } 1228 return deadline 1229} 1230 1231func waitForPredicate(deadline time.Time, tick time.Duration, pred func() bool) bool { 1232 for time.Now().Before(deadline) { 1233 if pred() { 1234 return true 1235 } 1236 1237 time.Sleep(tick) 1238 } 1239 1240 return pred() 1241}