fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 30 kB View raw
1package index 2 3import ( 4 "errors" 5 "flag" 6 "fmt" 7 "io" 8 "log" 9 "os" 10 "path/filepath" 11 "reflect" 12 "strconv" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/google/go-cmp/cmp" 18 "github.com/google/go-cmp/cmp/cmpopts" 19 "github.com/prometheus/client_golang/prometheus/testutil" 20 "github.com/stretchr/testify/require" 21 22 "github.com/sourcegraph/zoekt" 23) 24 25var update = flag.Bool("update", false, "update golden file") 26 27// ensure we don't regress on how we build v16 28func TestBuildv16(t *testing.T) { 29 dir := t.TempDir() 30 31 opts := Options{ 32 IndexDir: dir, 33 RepositoryDescription: zoekt.Repository{ 34 Name: "repo", 35 Source: "./testdata/repo/", 36 Metadata: map[string]string{"foo": "bar"}, 37 }, 38 DisableCTags: true, 39 } 40 opts.SetDefaults() 41 42 b, err := NewBuilder(opts) 43 if err != nil { 44 t.Fatal(err) 45 } 46 47 for _, p := range []string{"main.go"} { 48 blob, err := os.ReadFile(filepath.Join("../testdata/repo", p)) 49 if err != nil { 50 t.Fatal(err) 51 } 52 if err := b.AddFile(p, blob); err != nil { 53 t.Fatal(err) 54 } 55 } 56 57 wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt") 58 59 // fields indexTime and id depend on time. For this test, we copy the fields from 60 // the old shard. 61 _, wantMetadata, err := ReadMetadataPath(wantP) 62 if err != nil { 63 t.Fatal(err) 64 } 65 b.indexTime = wantMetadata.IndexTime 66 b.id = wantMetadata.ID 67 68 if err := b.Finish(); err != nil { 69 t.Fatal(err) 70 } 71 72 gotP := filepath.Join(dir, "repo_v16.00000.zoekt") 73 74 if *update { 75 data, err := os.ReadFile(gotP) 76 if err != nil { 77 t.Fatal(err) 78 } 79 err = os.WriteFile(wantP, data, 0o644) 80 if err != nil { 81 t.Fatal(err) 82 } 83 return 84 } 85 86 got, err := os.ReadFile(gotP) 87 if err != nil { 88 t.Fatal(err) 89 } 90 want, err := os.ReadFile(wantP) 91 if err != nil { 92 t.Fatal(err) 93 } 94 95 if d := cmp.Diff(want, got); d != "" { 96 t.Errorf("mismatch (-want +got):\n%s", d) 97 } 98} 99 100func TestFlags(t *testing.T) { 101 cases := []struct { 102 args []string 103 want Options 104 }{{ 105 // Defaults 106 args: []string{}, 107 want: Options{}, 108 }, { 109 args: []string{"-index", "/tmp"}, 110 want: Options{ 111 IndexDir: "/tmp", 112 }, 113 }, { 114 // single large file pattern 115 args: []string{"-large_file", "*.md"}, 116 want: Options{ 117 LargeFiles: []string{"*.md"}, 118 }, 119 }, { 120 // multiple large file pattern 121 args: []string{"-large_file", "*.md", "-large_file", "*.yaml"}, 122 want: Options{ 123 LargeFiles: []string{"*.md", "*.yaml"}, 124 }, 125 }, { 126 // multiple large file pattern with negated pattern 127 args: []string{"-large_file", "*.md", "-large_file", "!*.yaml"}, 128 want: Options{ 129 LargeFiles: []string{"*.md", "!*.yaml"}, 130 }, 131 }, { 132 // multiple large file pattern with escaped character 133 args: []string{"-large_file", "*.md", "-large_file", "\\!*.yaml"}, 134 want: Options{ 135 LargeFiles: []string{"*.md", "\\!*.yaml"}, 136 }, 137 }} 138 139 ignored := []cmp.Option{ 140 // depends on $PATH setting. 141 cmpopts.IgnoreFields(Options{}, "CTagsPath"), 142 cmpopts.IgnoreFields(Options{}, "ScipCTagsPath"), 143 cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"), 144 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 145 } 146 147 for _, c := range cases { 148 c.want.SetDefaults() 149 // depends on $PATH setting. 150 c.want.CTagsPath = "" 151 152 got := Options{} 153 fs := flag.NewFlagSet("", flag.ContinueOnError) 154 got.Flags(fs) 155 if err := fs.Parse(c.args); err != nil { 156 t.Errorf("failed to parse args %v: %v", c.args, err) 157 } else if d := cmp.Diff(c.want, got, ignored...); d != "" { 158 t.Errorf("mismatch for %v (-want +got):\n%s", c.args, d) 159 } 160 } 161} 162 163func TestIncrementalSkipIndexing(t *testing.T) { 164 cases := []struct { 165 name string 166 want bool 167 opts Options 168 }{{ 169 name: "v17-noop", 170 want: true, 171 opts: Options{ 172 RepositoryDescription: zoekt.Repository{ 173 Name: "repo17", 174 }, 175 SizeMax: 2097152, 176 DisableCTags: true, 177 }, 178 }, { 179 name: "v16-noop", 180 want: true, 181 opts: Options{ 182 RepositoryDescription: zoekt.Repository{ 183 Name: "repo", 184 }, 185 SizeMax: 2097152, 186 DisableCTags: true, 187 }, 188 }, { 189 name: "v17-id", 190 want: false, 191 opts: Options{ 192 RepositoryDescription: zoekt.Repository{ 193 Name: "repo17", 194 RawConfig: map[string]string{ 195 "repoid": "123", 196 }, 197 }, 198 SizeMax: 2097152, 199 DisableCTags: true, 200 }, 201 }, { 202 name: "doesnotexist", 203 want: false, 204 opts: Options{ 205 RepositoryDescription: zoekt.Repository{ 206 Name: "doesnotexist", 207 }, 208 SizeMax: 2097152, 209 DisableCTags: true, 210 }, 211 }} 212 213 for _, tc := range cases { 214 t.Run(tc.name, func(t *testing.T) { 215 tc.opts.IndexDir = "../testdata/shards" 216 t.Log(tc.opts.IndexState()) 217 got := tc.opts.IncrementalSkipIndexing() 218 if got != tc.want { 219 t.Fatalf("want %v got %v", tc.want, got) 220 } 221 }) 222 } 223} 224 225func TestMain(m *testing.M) { 226 flag.Parse() 227 if !testing.Verbose() { 228 log.SetOutput(io.Discard) 229 } 230 os.Exit(m.Run()) 231} 232 233func TestDontCountContentOfSkippedFiles(t *testing.T) { 234 b, err := NewBuilder(Options{RepositoryDescription: zoekt.Repository{ 235 Name: "foo", 236 }}) 237 if err != nil { 238 t.Fatal(err) 239 } 240 241 // content with at least 100 bytes 242 binary := append([]byte("abc def \x00"), make([]byte, 100)...) 243 err = b.Add(Document{ 244 Name: "f1", 245 Content: binary, 246 }) 247 if err != nil { 248 t.Fatal(err) 249 } 250 if len(b.todo) != 1 || b.todo[0].SkipReason == SkipReasonNone { 251 t.Fatalf("document should have been skipped") 252 } 253 if b.todo[0].Content != nil { 254 t.Fatalf("document content should be empty") 255 } 256 if b.size >= 100 { 257 t.Fatalf("content of skipped documents should not count towards shard size thresold") 258 } 259} 260 261func TestPartialSuccess(t *testing.T) { 262 dir := t.TempDir() 263 264 opts := Options{ 265 IndexDir: dir, 266 ShardMax: 1024, 267 SizeMax: 1 << 20, 268 Parallelism: 1, 269 } 270 opts.RepositoryDescription.Name = "repo" 271 opts.SetDefaults() 272 273 b, err := NewBuilder(opts) 274 if err != nil { 275 t.Fatalf("NewBuilder: %v", err) 276 } 277 278 for i := range 4 { 279 nm := fmt.Sprintf("F%d", i) 280 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) 281 } 282 b.buildError = fmt.Errorf("any error") 283 284 // No error checking. 285 _ = b.Finish() 286 287 // Finish cleans up temporary files. 288 if fs, err := filepath.Glob(dir + "/*"); err != nil { 289 t.Errorf("glob(%s): %v", dir, err) 290 } else if len(fs) != 0 { 291 t.Errorf("got shards %v, want []", fs) 292 } 293} 294 295// Tests that we skip looping over repos in compound shards when we know that 296// the repository we are looking for is not in the shard. 297func TestSkipCompoundShards(t *testing.T) { 298 metricCompoundShardLookups.Reset() 299 300 compoundShards := [][]zoekt.Repository{ 301 { 302 {Name: "repoA", ID: 1}, 303 {Name: "repoB", ID: 2}, 304 {Name: "repoC", ID: 3}, 305 }, 306 { 307 {Name: "repoD", ID: 4}, 308 {Name: "repoE", ID: 5}, 309 {Name: "repoF", ID: 6}, 310 {Name: "repoF", ID: 7}, 311 {Name: "repoF", ID: 8}, 312 }, 313 } 314 var lookForRepoID uint32 = 99 315 wantSkippedCount := 2 316 317 indexDir := t.TempDir() 318 for _, repositoryGroup := range compoundShards { 319 createTestCompoundShard(t, indexDir, repositoryGroup) 320 } 321 o := &Options{ 322 IndexDir: indexDir, 323 RepositoryDescription: zoekt.Repository{ID: lookForRepoID}, 324 } 325 326 shard := o.findCompoundShard() 327 require.Empty(t, shard) 328 329 // Check if the "skipped" counter was incremented 330 skippedCount := int(testutil.ToFloat64(metricCompoundShardLookups.WithLabelValues("skipped"))) 331 require.Equal(t, wantSkippedCount, skippedCount) 332} 333 334// With optimization 335// BenchmarkFindCompoundShard-16 33505 36016 ns/op 336// 337// Without optimization 338// BenchmarkFindCompoundShard-16 76 15568589 ns/op 339func BenchmarkFindCompoundShard(b *testing.B) { 340 // Generate a large compound shard 341 const numRepos = 5000 342 repositories := make([]zoekt.Repository, numRepos) 343 for i := range numRepos { 344 repositories[i] = zoekt.Repository{ 345 Name: fmt.Sprintf("repo%d", i+1), 346 ID: uint32(i + 1), 347 } 348 } 349 indexDir := b.TempDir() 350 createTestCompoundShard(b, indexDir, repositories) 351 352 // pick id that is not in the shard 353 var searchRepoID uint32 = numRepos + 1 354 355 b.ResetTimer() 356 for i := 0; i < b.N; i++ { 357 o := &Options{ 358 IndexDir: indexDir, 359 RepositoryDescription: zoekt.Repository{ID: searchRepoID}, 360 } 361 362 shard := o.findCompoundShard() 363 if shard != "" { 364 b.Fatal("expected empty result") 365 } 366 } 367} 368 369func TestOptions_FindAllShards(t *testing.T) { 370 type simpleShard struct { 371 Repository zoekt.Repository 372 // NumShards is the number of shards that should be created that 373 // contain data for "Repository". 374 NumShards int 375 } 376 377 tests := []struct { 378 name string 379 simpleShards []simpleShard 380 compoundShards [][]zoekt.Repository 381 expectedShardCount int 382 expectedRepository zoekt.Repository 383 }{ 384 { 385 name: "repository in normal shard", 386 simpleShards: []simpleShard{ 387 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 388 {Repository: zoekt.Repository{Name: "repoB", ID: 2}}, 389 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 390 }, 391 expectedShardCount: 1, 392 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 393 }, 394 { 395 name: "repository in compound shard", 396 compoundShards: [][]zoekt.Repository{ 397 { 398 {Name: "repoA", ID: 1}, 399 {Name: "repoB", ID: 2}, 400 {Name: "repoC", ID: 3}, 401 }, 402 { 403 {Name: "repoD", ID: 4}, 404 {Name: "repoE", ID: 5}, 405 {Name: "repoF", ID: 6}, 406 }, 407 }, 408 expectedShardCount: 1, 409 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 410 }, 411 { 412 name: "repository split across multiple shards", 413 simpleShards: []simpleShard{ 414 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 415 {Repository: zoekt.Repository{Name: "repoB", ID: 2}, NumShards: 2}, 416 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 417 }, 418 expectedShardCount: 2, 419 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 420 }, 421 { 422 name: "unknown repository", 423 simpleShards: []simpleShard{ 424 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 425 {Repository: zoekt.Repository{Name: "repoB", ID: 2}}, 426 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 427 }, 428 compoundShards: [][]zoekt.Repository{ 429 { 430 {Name: "repoD", ID: 4}, 431 {Name: "repoE", ID: 5}, 432 {Name: "repoF", ID: 6}, 433 }, 434 }, 435 expectedShardCount: 0, 436 }, 437 { 438 name: "match on ID, not name (compound only)", 439 compoundShards: [][]zoekt.Repository{ 440 { 441 {Name: "repoA", ID: 1}, 442 {Name: "repoB", ID: 2}, 443 {Name: "repoC", ID: 3}, 444 }, 445 { 446 {Name: "repoD", ID: 4}, 447 {Name: "repoE", ID: 5}, 448 {Name: "repoF", ID: 6}, 449 }, 450 }, 451 expectedShardCount: 1, 452 expectedRepository: zoekt.Repository{Name: "something-else", ID: 5}, 453 }, 454 } 455 for _, tt := range tests { 456 t.Run(tt.name, func(t *testing.T) { 457 t.Parallel() 458 459 // prepare 460 indexDir := t.TempDir() 461 462 for _, s := range tt.simpleShards { 463 createTestShard(t, indexDir, s.Repository, s.NumShards) 464 } 465 466 for _, repositoryGroup := range tt.compoundShards { 467 createTestCompoundShard(t, indexDir, repositoryGroup) 468 } 469 470 o := &Options{ 471 IndexDir: indexDir, 472 RepositoryDescription: tt.expectedRepository, 473 } 474 o.SetDefaults() 475 476 // run test 477 shards := o.FindAllShards() 478 479 // verify results 480 if len(shards) != tt.expectedShardCount { 481 t.Fatalf("expected %d shard(s), received %d shard(s)", tt.expectedShardCount, len(shards)) 482 } 483 484 if tt.expectedShardCount > 0 { 485 for _, s := range shards { 486 // all shards should contain the metadata for the desired repository 487 repos, _, err := ReadMetadataPathAlive(s) 488 if err != nil { 489 t.Fatalf("reading metadata from shard %q: %s", s, err) 490 } 491 492 foundRepository := false 493 for _, r := range repos { 494 if r.ID == tt.expectedRepository.ID { 495 foundRepository = true 496 break 497 } 498 } 499 500 if !foundRepository { 501 t.Errorf("shard %q doesn't contain metadata for repository %d", s, tt.expectedRepository.ID) 502 } 503 } 504 } 505 }) 506 } 507} 508 509func TestBuilder_BranchNamesEqual(t *testing.T) { 510 for i, test := range []struct { 511 oldBranches []zoekt.RepositoryBranch 512 newBranches []zoekt.RepositoryBranch 513 expected bool 514 }{ 515 { 516 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 517 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 518 expected: true, 519 }, 520 { 521 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}}, 522 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}}, 523 expected: true, 524 }, 525 { 526 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 527 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}}, 528 expected: false, 529 }, 530 { 531 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 532 newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}}, 533 expected: false, 534 }, 535 { 536 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 537 newBranches: []zoekt.RepositoryBranch{}, 538 expected: false, 539 }, 540 { 541 oldBranches: []zoekt.RepositoryBranch{}, 542 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 543 expected: false, 544 }, 545 } { 546 t.Run(strconv.Itoa(i), func(t *testing.T) { 547 actual := BranchNamesEqual(test.oldBranches, test.newBranches) 548 if test.expected != actual { 549 t.Errorf("expected: %t, got: %t", test.expected, actual) 550 } 551 }) 552 } 553} 554 555func TestBuilder_DeltaShardsBuildsShouldErrorOnBranchSet(t *testing.T) { 556 indexDir := t.TempDir() 557 558 repository := zoekt.Repository{ 559 Name: "repo", 560 ID: 1, 561 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "bar"}}, 562 } 563 createTestShard(t, indexDir, repository, 2) 564 565 repositoryNewBranches := zoekt.Repository{ 566 Name: "repo", 567 ID: 1, 568 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "baz"}}, 569 } 570 571 o := Options{ 572 IndexDir: indexDir, 573 RepositoryDescription: repositoryNewBranches, 574 IsDelta: true, 575 } 576 o.SetDefaults() 577 578 b, err := NewBuilder(o) 579 if err != nil { 580 t.Fatalf("NewBuilder: %v", err) 581 } 582 583 err = b.Finish() 584 if !errors.As(err, &deltaBranchSetError{}) { 585 t.Fatalf("expected error complaning about different branch names, got: %s", err) 586 } 587} 588 589func TestBuilder_DeltaShardsBuildsShouldErrorOnIndexOptionsMismatch(t *testing.T) { 590 repository := zoekt.Repository{ 591 Name: "repo", 592 ID: 1, 593 Branches: []zoekt.RepositoryBranch{{Name: "foo"}}, 594 } 595 596 for _, test := range []struct { 597 name string 598 options func(options *Options) 599 }{ 600 { 601 name: "update option CTagsPath to non default", 602 options: func(options *Options) { options.CTagsPath = "ctags_updated_test/universal-ctags" }, 603 }, 604 { 605 name: "update option DisableCTags to non default", 606 options: func(options *Options) { options.DisableCTags = true }, 607 }, 608 { 609 name: "update option SizeMax to non default", 610 options: func(options *Options) { options.SizeMax -= 10 }, 611 }, 612 { 613 name: "update option LargeFiles to non default", 614 options: func(options *Options) { options.LargeFiles = []string{"-large_file", "*.md", "-large_file", "*.yaml"} }, 615 }, 616 } { 617 test := test 618 619 t.Run(test.name, func(t *testing.T) { 620 indexDir := t.TempDir() 621 622 // initially use default options 623 createTestShard(t, indexDir, repository, 2) 624 625 o := Options{ 626 IndexDir: indexDir, 627 RepositoryDescription: repository, 628 IsDelta: true, 629 } 630 test.options(&o) 631 632 b, err := NewBuilder(o) 633 if err != nil { 634 t.Fatalf("NewBuilder: %v", err) 635 } 636 637 err = b.Finish() 638 if err == nil { 639 t.Fatalf("no error regarding index options mismatch") 640 } 641 642 var optionsMismatchError *deltaIndexOptionsMismatchError 643 if !errors.As(err, &optionsMismatchError) { 644 t.Fatalf("expected error complaining about index options mismatch, got: %s", err) 645 } 646 }) 647 } 648} 649 650func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) { 651 olderTime := time.Unix(0, 0) 652 newerTime := time.Unix(10000, 0) 653 654 for _, test := range []struct { 655 name string 656 originalRepository zoekt.Repository 657 updatedRepository zoekt.Repository 658 }{ 659 { 660 name: "update commit information", 661 originalRepository: zoekt.Repository{ 662 Name: "repo", 663 ID: 1, 664 Branches: []zoekt.RepositoryBranch{ 665 {Name: "main", Version: "v1"}, 666 {Name: "release", Version: "v1"}, 667 }, 668 }, 669 updatedRepository: zoekt.Repository{ 670 Name: "repo", 671 ID: 1, 672 Branches: []zoekt.RepositoryBranch{ 673 {Name: "main", Version: "v2"}, 674 {Name: "release", Version: "v2"}, 675 }, 676 }, 677 }, 678 { 679 name: "update latest commit date (older -> newer)", 680 originalRepository: zoekt.Repository{ 681 Name: "repo", 682 ID: 1, 683 Branches: []zoekt.RepositoryBranch{ 684 {Name: "main", Version: "v1"}, 685 }, 686 LatestCommitDate: olderTime, 687 }, 688 updatedRepository: zoekt.Repository{ 689 Name: "repo", 690 ID: 1, 691 Branches: []zoekt.RepositoryBranch{ 692 {Name: "main", Version: "v2"}, 693 }, 694 LatestCommitDate: newerTime, 695 }, 696 }, 697 { 698 name: "update latest commit date (even if latest commit date is older - the most recent commits are the source of truth)", 699 originalRepository: zoekt.Repository{ 700 Name: "repo", 701 ID: 1, 702 Branches: []zoekt.RepositoryBranch{ 703 {Name: "main", Version: "v1"}, 704 }, 705 LatestCommitDate: newerTime, 706 }, 707 updatedRepository: zoekt.Repository{ 708 Name: "repo", 709 ID: 1, 710 Branches: []zoekt.RepositoryBranch{ 711 {Name: "main", Version: "v2"}, 712 }, 713 LatestCommitDate: olderTime, 714 }, 715 }, 716 } { 717 test := test 718 719 t.Run(test.name, func(t *testing.T) { 720 indexDir := t.TempDir() 721 722 createTestShard(t, indexDir, test.originalRepository, 2, func(o *Options) { 723 o.DisableCTags = true 724 }) 725 726 shards := createTestShard(t, indexDir, test.updatedRepository, 1, func(o *Options) { 727 o.IsDelta = true 728 o.DisableCTags = true 729 }) 730 731 if len(shards) < 3 { 732 t.Fatalf("expected at least 3 shards, got %d (%s)", len(shards), strings.Join(shards, ", ")) 733 } 734 735 for _, s := range shards { 736 repositories, _, err := ReadMetadataPathAlive(s) 737 if err != nil { 738 t.Fatalf("reading repository metadata from shard %q", s) 739 } 740 741 var foundRepository *zoekt.Repository 742 for _, r := range repositories { 743 if r.ID == test.updatedRepository.ID { 744 foundRepository = r 745 break 746 } 747 } 748 749 if foundRepository == nil { 750 t.Fatalf("repository ID %d not in shard %q", test.updatedRepository.ID, s) 751 } 752 753 diffOptions := []cmp.Option{ 754 cmpopts.IgnoreUnexported(zoekt.Repository{}), 755 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"), 756 cmpopts.EquateEmpty(), 757 } 758 759 if diff := cmp.Diff(&test.updatedRepository, foundRepository, diffOptions...); diff != "" { 760 t.Errorf("shard %q: unexpected diff in repository metadata (-want +got):\n%s", s, diff) 761 } 762 } 763 }) 764 } 765} 766 767func TestFindRepositoryMetadata(t *testing.T) { 768 tests := []struct { 769 name string 770 normalShardRepositories []zoekt.Repository 771 compoundShardRepositories []zoekt.Repository 772 input *zoekt.Repository 773 expectedRepository *zoekt.Repository 774 expectedOk bool 775 }{ 776 { 777 name: "repository in normal shards", 778 normalShardRepositories: []zoekt.Repository{ 779 {Name: "repoA", ID: 1}, 780 {Name: "repoB", ID: 2}, 781 {Name: "repoC", ID: 3}, 782 }, 783 compoundShardRepositories: []zoekt.Repository{ 784 {Name: "repoD", ID: 4}, 785 {Name: "repoE", ID: 5}, 786 {Name: "repoF", ID: 6}, 787 }, 788 input: &zoekt.Repository{Name: "repoB", ID: 2}, 789 expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2}, 790 expectedOk: true, 791 }, 792 { 793 name: "repository in compound shards", 794 normalShardRepositories: []zoekt.Repository{ 795 {Name: "repoA", ID: 1}, 796 {Name: "repoB", ID: 2}, 797 {Name: "repoC", ID: 3}, 798 }, 799 compoundShardRepositories: []zoekt.Repository{ 800 {Name: "repoD", ID: 4}, 801 {Name: "repoE", ID: 5}, 802 {Name: "repoF", ID: 6}, 803 }, 804 input: &zoekt.Repository{Name: "repoE", ID: 5}, 805 expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5}, 806 expectedOk: true, 807 }, 808 { 809 name: "repository not in any shard", 810 normalShardRepositories: []zoekt.Repository{ 811 {Name: "repoA", ID: 1}, 812 {Name: "repoB", ID: 2}, 813 {Name: "repoC", ID: 3}, 814 }, 815 compoundShardRepositories: []zoekt.Repository{ 816 {Name: "repoD", ID: 4}, 817 {Name: "repoE", ID: 5}, 818 {Name: "repoF", ID: 6}, 819 }, 820 input: &zoekt.Repository{Name: "notPresent", ID: 123}, 821 expectedRepository: nil, 822 expectedOk: false, 823 }, 824 } 825 for _, tt := range tests { 826 t.Run(tt.name, func(t *testing.T) { 827 // setup 828 indexDir := t.TempDir() 829 830 optFns := []func(o *Options){ 831 // ctags aren't important for this test, and the equality checks 832 // for diffing repositories can break due to local configuration 833 func(o *Options) { 834 o.DisableCTags = true 835 }, 836 } 837 838 for _, r := range tt.normalShardRepositories { 839 createTestShard(t, indexDir, r, 1, optFns...) 840 } 841 842 if len(tt.compoundShardRepositories) > 0 { 843 createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...) 844 } 845 846 o := &Options{ 847 IndexDir: indexDir, 848 RepositoryDescription: *tt.input, 849 } 850 o.SetDefaults() 851 852 // run test 853 got, _, gotOk, err := o.FindRepositoryMetadata() 854 if err != nil { 855 t.Errorf("received unexpected error: %v", err) 856 return 857 } 858 859 // check outcome 860 compareOptions := []cmp.Option{ 861 cmpopts.IgnoreUnexported(zoekt.Repository{}), 862 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"), 863 cmpopts.EquateEmpty(), 864 } 865 866 if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" { 867 t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff) 868 } 869 870 if tt.expectedOk != gotOk { 871 t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk) 872 } 873 }) 874 } 875} 876 877func createTestShard(t testing.TB, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string { 878 t.Helper() 879 880 if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil { 881 t.Fatal(err) 882 } 883 884 o := Options{ 885 IndexDir: indexDir, 886 RepositoryDescription: r, 887 ShardMax: 75, // create a new shard every 75 bytes 888 } 889 o.SetDefaults() 890 891 for _, fn := range optFns { 892 fn(&o) 893 } 894 895 b, err := NewBuilder(o) 896 if err != nil { 897 t.Fatalf("NewBuilder: %v", err) 898 } 899 900 if numShards == 0 { 901 // We have to make at least 1 shard. 902 numShards = 1 903 } 904 905 for i := range numShards { 906 // Create entries (file + contents) that are ~100 bytes each. 907 // This (along with our shardMax setting of 75 bytes) means that each shard 908 // will contain at most one of these. 909 fileName := strconv.Itoa(i) 910 document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} 911 for _, branch := range o.RepositoryDescription.Branches { 912 document.Branches = append(document.Branches, branch.Name) 913 } 914 915 err := b.Add(document) 916 if err != nil { 917 t.Fatalf("failed to add file %q to builder: %s", fileName, err) 918 } 919 } 920 921 if err := b.Finish(); err != nil { 922 t.Fatalf("Finish: %v", err) 923 } 924 925 return o.FindAllShards() 926} 927 928func createTestCompoundShard(t testing.TB, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) { 929 t.Helper() 930 931 var shardNames []string 932 933 for _, r := range repositories { 934 // create an isolated scratch space to store normal shards for this repository 935 scratchDir := t.TempDir() 936 937 // create shards that'll be merged later 938 createTestShard(t, scratchDir, r, 1, optFns...) 939 940 // discover file names for all the normal shards we created 941 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse 942 shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt")) 943 if err != nil { 944 t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err) 945 } 946 947 shardNames = append(shardNames, shards...) 948 } 949 950 // load the normal shards that we created 951 var files []IndexFile 952 for _, shard := range shardNames { 953 f, err := os.Open(shard) 954 if err != nil { 955 t.Fatalf("opening shard file: %s", err) 956 } 957 defer f.Close() 958 959 indexFile, err := NewIndexFile(f) 960 if err != nil { 961 t.Fatalf("creating index file: %s", err) 962 } 963 defer indexFile.Close() 964 965 files = append(files, indexFile) 966 } 967 968 // merge all the simple shards into a compound shard 969 tmpName, dstName, err := Merge(indexDir, files...) 970 if err != nil { 971 t.Fatalf("merging index files into compound shard: %s", err) 972 } 973 if err := os.Rename(tmpName, dstName); err != nil { 974 t.Fatal(err) 975 } 976} 977 978func TestIgnoreSizeMax(t *testing.T) { 979 for _, test := range []struct { 980 name string 981 largeFiles []string 982 filePaths []string 983 expected bool 984 }{ 985 { 986 name: "empty pattern does nothing", 987 largeFiles: []string{""}, 988 filePaths: []string{"F0"}, 989 expected: false, 990 }, 991 { 992 name: "positive match allows", 993 largeFiles: []string{"F0"}, 994 filePaths: []string{"F0"}, 995 expected: true, 996 }, 997 { 998 name: "positive and negative patterns allows", 999 largeFiles: []string{"F?", "!F0"}, 1000 filePaths: []string{"F1"}, 1001 expected: true, 1002 }, 1003 { 1004 name: "positive and negative patterns disallows", 1005 largeFiles: []string{"F?", "!F0"}, 1006 filePaths: []string{"F0"}, 1007 expected: false, 1008 }, 1009 { 1010 name: "positive escaped pattern allows", 1011 largeFiles: []string{"\\!F?"}, 1012 filePaths: []string{"!F0", "!F1"}, 1013 expected: true, 1014 }, 1015 { 1016 name: "postive escaped pattern does not disallow", 1017 largeFiles: []string{"F0", "\\!F?"}, 1018 filePaths: []string{"F0", "!F0"}, 1019 expected: true, 1020 }, 1021 { 1022 name: "combined meta and literal interpretation disallows", 1023 largeFiles: []string{"*F*", "!!F*"}, 1024 filePaths: []string{"!F0"}, 1025 expected: false, 1026 }, 1027 { 1028 name: "combined meta and literal interpretation allows", 1029 largeFiles: []string{"*F*", "!!F*"}, 1030 filePaths: []string{"F0"}, 1031 expected: true, 1032 }, 1033 { 1034 name: "largeFiles order: positive match overrides previous negative match and allows", 1035 largeFiles: []string{"F?", "!F0", "!F1", "F0"}, 1036 filePaths: []string{"F0"}, 1037 expected: true, 1038 }, 1039 { 1040 name: "largeFiles order: positive match overrides previous negative match and disallows", 1041 largeFiles: []string{"F?", "!F0", "!F1", "F0"}, 1042 filePaths: []string{"F1"}, 1043 expected: false, 1044 }, 1045 { 1046 name: "largeFiles order: negative match overrides previous positive match and allows", 1047 largeFiles: []string{"F?", "!?0", "F0", "!F0"}, 1048 filePaths: []string{"F1"}, 1049 expected: true, 1050 }, 1051 { 1052 name: "largeFiles order: negative match overrides previous positive match and disallows", 1053 largeFiles: []string{"F?", "!?0", "F0", "!F0"}, 1054 filePaths: []string{"F0"}, 1055 expected: false, 1056 }, 1057 } { 1058 t.Run(test.name, func(t *testing.T) { 1059 o := Options{ 1060 LargeFiles: test.largeFiles, 1061 } 1062 1063 for _, filePath := range test.filePaths { 1064 ignore := o.IgnoreSizeMax(filePath) 1065 if ignore != test.expected { 1066 t.Errorf("IgnoreSizeMax() for filepath %v returned unexpected result %v", filePath, ignore) 1067 } 1068 } 1069 }) 1070 } 1071} 1072 1073type filerankCase struct { 1074 name string 1075 docs []*Document 1076 want []int 1077} 1078 1079func testFileRankAspect(t *testing.T, c filerankCase) { 1080 var want []*Document 1081 for _, j := range c.want { 1082 want = append(want, c.docs[j]) 1083 } 1084 1085 got := make([]*Document, len(c.docs)) 1086 copy(got, c.docs) 1087 for _, d := range got { 1088 DetermineFileCategory(d) 1089 } 1090 sortDocuments(got) 1091 1092 print := func(ds []*Document) string { 1093 r := "" 1094 for _, d := range ds { 1095 r += fmt.Sprintf("%v, ", d) 1096 } 1097 return r 1098 } 1099 if !reflect.DeepEqual(got, want) { 1100 t.Errorf("got docs [%v], want [%v]", print(got), print(want)) 1101 } 1102} 1103 1104func TestFileRank(t *testing.T) { 1105 for _, c := range []filerankCase{{ 1106 name: "filename", 1107 docs: []*Document{ 1108 { 1109 Name: "longlonglong", 1110 Content: []byte("bla"), 1111 }, 1112 { 1113 Name: "short", 1114 Content: []byte("bla"), 1115 }, 1116 }, 1117 want: []int{1, 0}, 1118 }, { 1119 name: "test", 1120 docs: []*Document{ 1121 { 1122 Name: "foo_test.go", 1123 Content: []byte("bla"), 1124 }, 1125 { 1126 Name: "longlonglong", 1127 Content: []byte("bla"), 1128 }, 1129 }, 1130 want: []int{1, 0}, 1131 }, { 1132 name: "content", 1133 docs: []*Document{ 1134 { 1135 Content: []byte("bla"), 1136 }, 1137 { 1138 Content: []byte("blablablabla"), 1139 }, 1140 { 1141 Content: []byte("blabla"), 1142 }, 1143 }, 1144 want: []int{0, 2, 1}, 1145 }, { 1146 name: "skipped docs", 1147 docs: []*Document{ 1148 { 1149 Name: "binary_file", 1150 SkipReason: SkipReasonBinary, 1151 }, 1152 { 1153 Name: "some_test.go", 1154 Content: []byte("bla"), 1155 }, 1156 { 1157 Name: "large_file.go", 1158 SkipReason: SkipReasonTooLarge, 1159 }, 1160 { 1161 Name: "file.go", 1162 Content: []byte("blabla"), 1163 }, 1164 }, 1165 want: []int{3, 1, 0, 2}, 1166 }} { 1167 t.Run(c.name, func(t *testing.T) { 1168 testFileRankAspect(t, c) 1169 }) 1170 } 1171} 1172 1173func TestOptions_shardName(t *testing.T) { 1174 opts := Options{ 1175 IndexDir: "/data", 1176 RepositoryDescription: zoekt.Repository{ 1177 Name: "a/b", 1178 TenantID: 123, 1179 ID: 456, 1180 }, 1181 } 1182 1183 t.Setenv("WORKSPACES_API_URL", "") 1184 if got, want := opts.shardNameVersion(16, 0), "/data/a%2Fb_v16.00000.zoekt"; got != want { 1185 t.Fatalf("expected shard name to be repo name based:\ngot: %q\nwant: %q", got, want) 1186 } 1187 1188 t.Setenv("WORKSPACES_API_URL", "http://example.com") 1189 if got, want := opts.shardNameVersion(16, 0), "/data/000000123_000000456_v16.00000.zoekt"; got != want { 1190 t.Fatalf("expected shard name to be ID based:\ngot: %q\nwant: %q", got, want) 1191 } 1192 1193 // If something goes wrong and TenantID and RepoID is unset, we create a 1194 // name which won't be visible by any tenant. 1195 opts = Options{ 1196 IndexDir: "/data", 1197 RepositoryDescription: zoekt.Repository{ 1198 Name: "a/b", 1199 }, 1200 } 1201 if got, want := opts.shardNameVersion(16, 0), "/data/000000000_000000000_v16.00000.zoekt"; got != want { 1202 t.Fatalf("expected shard name to be with no tenant:\ngot: %q\nwant: %q", got, want) 1203 } 1204}