fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package index 2 3import ( 4 "errors" 5 "flag" 6 "fmt" 7 "io" 8 "log" 9 "os" 10 "path/filepath" 11 "reflect" 12 "strconv" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/google/go-cmp/cmp" 18 "github.com/google/go-cmp/cmp/cmpopts" 19 "github.com/prometheus/client_golang/prometheus/testutil" 20 "github.com/stretchr/testify/require" 21 22 "github.com/sourcegraph/zoekt" 23) 24 25var update = flag.Bool("update", false, "update golden file") 26 27// ensure we don't regress on how we build v16 28func TestBuildv16(t *testing.T) { 29 dir := t.TempDir() 30 31 opts := Options{ 32 IndexDir: dir, 33 RepositoryDescription: zoekt.Repository{ 34 Name: "repo", 35 Source: "./testdata/repo/", 36 }, 37 DisableCTags: true, 38 } 39 opts.SetDefaults() 40 41 b, err := NewBuilder(opts) 42 if err != nil { 43 t.Fatal(err) 44 } 45 46 for _, p := range []string{"main.go"} { 47 blob, err := os.ReadFile(filepath.Join("../testdata/repo", p)) 48 if err != nil { 49 t.Fatal(err) 50 } 51 if err := b.AddFile(p, blob); err != nil { 52 t.Fatal(err) 53 } 54 } 55 56 wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt") 57 58 // fields indexTime and id depend on time. For this test, we copy the fields from 59 // the old shard. 60 _, wantMetadata, err := ReadMetadataPath(wantP) 61 if err != nil { 62 t.Fatal(err) 63 } 64 b.indexTime = wantMetadata.IndexTime 65 b.id = wantMetadata.ID 66 67 if err := b.Finish(); err != nil { 68 t.Fatal(err) 69 } 70 71 gotP := filepath.Join(dir, "repo_v16.00000.zoekt") 72 73 if *update { 74 data, err := os.ReadFile(gotP) 75 if err != nil { 76 t.Fatal(err) 77 } 78 err = os.WriteFile(wantP, data, 0o644) 79 if err != nil { 80 t.Fatal(err) 81 } 82 return 83 } 84 85 got, err := os.ReadFile(gotP) 86 if err != nil { 87 t.Fatal(err) 88 } 89 want, err := os.ReadFile(wantP) 90 if err != nil { 91 t.Fatal(err) 92 } 93 94 if d := cmp.Diff(want, got); d != "" { 95 t.Errorf("mismatch (-want +got):\n%s", d) 96 } 97} 98 99func TestFlags(t *testing.T) { 100 cases := []struct { 101 args []string 102 want Options 103 }{{ 104 // Defaults 105 args: []string{}, 106 want: Options{}, 107 }, { 108 args: []string{"-index", "/tmp"}, 109 want: Options{ 110 IndexDir: "/tmp", 111 }, 112 }, { 113 // single large file pattern 114 args: []string{"-large_file", "*.md"}, 115 want: Options{ 116 LargeFiles: []string{"*.md"}, 117 }, 118 }, { 119 // multiple large file pattern 120 args: []string{"-large_file", "*.md", "-large_file", "*.yaml"}, 121 want: Options{ 122 LargeFiles: []string{"*.md", "*.yaml"}, 123 }, 124 }, { 125 // multiple large file pattern with negated pattern 126 args: []string{"-large_file", "*.md", "-large_file", "!*.yaml"}, 127 want: Options{ 128 LargeFiles: []string{"*.md", "!*.yaml"}, 129 }, 130 }, { 131 // multiple large file pattern with escaped character 132 args: []string{"-large_file", "*.md", "-large_file", "\\!*.yaml"}, 133 want: Options{ 134 LargeFiles: []string{"*.md", "\\!*.yaml"}, 135 }, 136 }} 137 138 ignored := []cmp.Option{ 139 // depends on $PATH setting. 140 cmpopts.IgnoreFields(Options{}, "CTagsPath"), 141 cmpopts.IgnoreFields(Options{}, "ScipCTagsPath"), 142 cmpopts.IgnoreFields(Options{}, "changedOrRemovedFiles"), 143 cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 144 } 145 146 for _, c := range cases { 147 c.want.SetDefaults() 148 // depends on $PATH setting. 149 c.want.CTagsPath = "" 150 151 got := Options{} 152 fs := flag.NewFlagSet("", flag.ContinueOnError) 153 got.Flags(fs) 154 if err := fs.Parse(c.args); err != nil { 155 t.Errorf("failed to parse args %v: %v", c.args, err) 156 } else if d := cmp.Diff(c.want, got, ignored...); d != "" { 157 t.Errorf("mismatch for %v (-want +got):\n%s", c.args, d) 158 } 159 } 160} 161 162func TestIncrementalSkipIndexing(t *testing.T) { 163 cases := []struct { 164 name string 165 want bool 166 opts Options 167 }{{ 168 name: "v17-noop", 169 want: true, 170 opts: Options{ 171 RepositoryDescription: zoekt.Repository{ 172 Name: "repo17", 173 }, 174 SizeMax: 2097152, 175 DisableCTags: true, 176 }, 177 }, { 178 name: "v16-noop", 179 want: true, 180 opts: Options{ 181 RepositoryDescription: zoekt.Repository{ 182 Name: "repo", 183 }, 184 SizeMax: 2097152, 185 DisableCTags: true, 186 }, 187 }, { 188 name: "v17-id", 189 want: false, 190 opts: Options{ 191 RepositoryDescription: zoekt.Repository{ 192 Name: "repo17", 193 RawConfig: map[string]string{ 194 "repoid": "123", 195 }, 196 }, 197 SizeMax: 2097152, 198 DisableCTags: true, 199 }, 200 }, { 201 name: "doesnotexist", 202 want: false, 203 opts: Options{ 204 RepositoryDescription: zoekt.Repository{ 205 Name: "doesnotexist", 206 }, 207 SizeMax: 2097152, 208 DisableCTags: true, 209 }, 210 }} 211 212 for _, tc := range cases { 213 t.Run(tc.name, func(t *testing.T) { 214 tc.opts.IndexDir = "../testdata/shards" 215 t.Log(tc.opts.IndexState()) 216 got := tc.opts.IncrementalSkipIndexing() 217 if got != tc.want { 218 t.Fatalf("want %v got %v", tc.want, got) 219 } 220 }) 221 } 222} 223 224func TestMain(m *testing.M) { 225 flag.Parse() 226 if !testing.Verbose() { 227 log.SetOutput(io.Discard) 228 } 229 os.Exit(m.Run()) 230} 231 232func TestDontCountContentOfSkippedFiles(t *testing.T) { 233 b, err := NewBuilder(Options{RepositoryDescription: zoekt.Repository{ 234 Name: "foo", 235 }}) 236 if err != nil { 237 t.Fatal(err) 238 } 239 240 // content with at least 100 bytes 241 binary := append([]byte("abc def \x00"), make([]byte, 100)...) 242 err = b.Add(Document{ 243 Name: "f1", 244 Content: binary, 245 }) 246 if err != nil { 247 t.Fatal(err) 248 } 249 if len(b.todo) != 1 || b.todo[0].SkipReason == "" { 250 t.Fatalf("document should have been skipped") 251 } 252 if b.todo[0].Content != nil { 253 t.Fatalf("document content should be empty") 254 } 255 if b.size >= 100 { 256 t.Fatalf("content of skipped documents should not count towards shard size thresold") 257 } 258} 259 260func TestPartialSuccess(t *testing.T) { 261 dir := t.TempDir() 262 263 opts := Options{ 264 IndexDir: dir, 265 ShardMax: 1024, 266 SizeMax: 1 << 20, 267 Parallelism: 1, 268 } 269 opts.RepositoryDescription.Name = "repo" 270 opts.SetDefaults() 271 272 b, err := NewBuilder(opts) 273 if err != nil { 274 t.Fatalf("NewBuilder: %v", err) 275 } 276 277 for i := 0; i < 4; i++ { 278 nm := fmt.Sprintf("F%d", i) 279 _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) 280 } 281 b.buildError = fmt.Errorf("any error") 282 283 // No error checking. 284 _ = b.Finish() 285 286 // Finish cleans up temporary files. 287 if fs, err := filepath.Glob(dir + "/*"); err != nil { 288 t.Errorf("glob(%s): %v", dir, err) 289 } else if len(fs) != 0 { 290 t.Errorf("got shards %v, want []", fs) 291 } 292} 293 294// Tests that we skip looping over repos in compound shards when we know that 295// the repository we are looking for is not in the shard. 296func TestSkipCompoundShards(t *testing.T) { 297 metricCompoundShardLookups.Reset() 298 299 compoundShards := [][]zoekt.Repository{ 300 { 301 {Name: "repoA", ID: 1}, 302 {Name: "repoB", ID: 2}, 303 {Name: "repoC", ID: 3}, 304 }, 305 { 306 {Name: "repoD", ID: 4}, 307 {Name: "repoE", ID: 5}, 308 {Name: "repoF", ID: 6}, 309 {Name: "repoF", ID: 7}, 310 {Name: "repoF", ID: 8}, 311 }, 312 } 313 var lookForRepoID uint32 = 99 314 wantSkippedCount := 2 315 316 indexDir := t.TempDir() 317 for _, repositoryGroup := range compoundShards { 318 createTestCompoundShard(t, indexDir, repositoryGroup) 319 } 320 o := &Options{ 321 IndexDir: indexDir, 322 RepositoryDescription: zoekt.Repository{ID: lookForRepoID}, 323 } 324 325 shard := o.findCompoundShard() 326 require.Empty(t, shard) 327 328 // Check if the "skipped" counter was incremented 329 skippedCount := int(testutil.ToFloat64(metricCompoundShardLookups.WithLabelValues("skipped"))) 330 require.Equal(t, wantSkippedCount, skippedCount) 331} 332 333// With optimization 334// BenchmarkFindCompoundShard-16 33505 36016 ns/op 335// 336// Without optimization 337// BenchmarkFindCompoundShard-16 76 15568589 ns/op 338func BenchmarkFindCompoundShard(b *testing.B) { 339 // Generate a large compound shard 340 const numRepos = 5000 341 repositories := make([]zoekt.Repository, numRepos) 342 for i := 0; i < numRepos; i++ { 343 repositories[i] = zoekt.Repository{ 344 Name: fmt.Sprintf("repo%d", i+1), 345 ID: uint32(i + 1), 346 } 347 } 348 indexDir := b.TempDir() 349 createTestCompoundShard(b, indexDir, repositories) 350 351 // pick id that is not in the shard 352 var searchRepoID uint32 = numRepos + 1 353 354 b.ResetTimer() 355 for i := 0; i < b.N; i++ { 356 o := &Options{ 357 IndexDir: indexDir, 358 RepositoryDescription: zoekt.Repository{ID: searchRepoID}, 359 } 360 361 shard := o.findCompoundShard() 362 if shard != "" { 363 b.Fatal("expected empty result") 364 } 365 } 366} 367 368func TestOptions_FindAllShards(t *testing.T) { 369 type simpleShard struct { 370 Repository zoekt.Repository 371 // NumShards is the number of shards that should be created that 372 // contain data for "Repository". 373 NumShards int 374 } 375 376 tests := []struct { 377 name string 378 simpleShards []simpleShard 379 compoundShards [][]zoekt.Repository 380 expectedShardCount int 381 expectedRepository zoekt.Repository 382 }{ 383 { 384 name: "repository in normal shard", 385 simpleShards: []simpleShard{ 386 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 387 {Repository: zoekt.Repository{Name: "repoB", ID: 2}}, 388 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 389 }, 390 expectedShardCount: 1, 391 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 392 }, 393 { 394 name: "repository in compound shard", 395 compoundShards: [][]zoekt.Repository{ 396 { 397 {Name: "repoA", ID: 1}, 398 {Name: "repoB", ID: 2}, 399 {Name: "repoC", ID: 3}, 400 }, 401 { 402 {Name: "repoD", ID: 4}, 403 {Name: "repoE", ID: 5}, 404 {Name: "repoF", ID: 6}, 405 }, 406 }, 407 expectedShardCount: 1, 408 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 409 }, 410 { 411 name: "repository split across multiple shards", 412 simpleShards: []simpleShard{ 413 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 414 {Repository: zoekt.Repository{Name: "repoB", ID: 2}, NumShards: 2}, 415 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 416 }, 417 expectedShardCount: 2, 418 expectedRepository: zoekt.Repository{Name: "repoB", ID: 2}, 419 }, 420 { 421 name: "unknown repository", 422 simpleShards: []simpleShard{ 423 {Repository: zoekt.Repository{Name: "repoA", ID: 1}}, 424 {Repository: zoekt.Repository{Name: "repoB", ID: 2}}, 425 {Repository: zoekt.Repository{Name: "repoC", ID: 3}}, 426 }, 427 compoundShards: [][]zoekt.Repository{ 428 { 429 {Name: "repoD", ID: 4}, 430 {Name: "repoE", ID: 5}, 431 {Name: "repoF", ID: 6}, 432 }, 433 }, 434 expectedShardCount: 0, 435 }, 436 { 437 name: "match on ID, not name (compound only)", 438 compoundShards: [][]zoekt.Repository{ 439 { 440 {Name: "repoA", ID: 1}, 441 {Name: "repoB", ID: 2}, 442 {Name: "repoC", ID: 3}, 443 }, 444 { 445 {Name: "repoD", ID: 4}, 446 {Name: "repoE", ID: 5}, 447 {Name: "repoF", ID: 6}, 448 }, 449 }, 450 expectedShardCount: 1, 451 expectedRepository: zoekt.Repository{Name: "something-else", ID: 5}, 452 }, 453 } 454 for _, tt := range tests { 455 t.Run(tt.name, func(t *testing.T) { 456 t.Parallel() 457 458 // prepare 459 indexDir := t.TempDir() 460 461 for _, s := range tt.simpleShards { 462 createTestShard(t, indexDir, s.Repository, s.NumShards) 463 } 464 465 for _, repositoryGroup := range tt.compoundShards { 466 createTestCompoundShard(t, indexDir, repositoryGroup) 467 } 468 469 o := &Options{ 470 IndexDir: indexDir, 471 RepositoryDescription: tt.expectedRepository, 472 } 473 o.SetDefaults() 474 475 // run test 476 shards := o.FindAllShards() 477 478 // verify results 479 if len(shards) != tt.expectedShardCount { 480 t.Fatalf("expected %d shard(s), received %d shard(s)", tt.expectedShardCount, len(shards)) 481 } 482 483 if tt.expectedShardCount > 0 { 484 for _, s := range shards { 485 // all shards should contain the metadata for the desired repository 486 repos, _, err := ReadMetadataPathAlive(s) 487 if err != nil { 488 t.Fatalf("reading metadata from shard %q: %s", s, err) 489 } 490 491 foundRepository := false 492 for _, r := range repos { 493 if r.ID == tt.expectedRepository.ID { 494 foundRepository = true 495 break 496 } 497 } 498 499 if !foundRepository { 500 t.Errorf("shard %q doesn't contain metadata for repository %d", s, tt.expectedRepository.ID) 501 } 502 } 503 } 504 }) 505 } 506} 507 508func TestBuilder_BranchNamesEqual(t *testing.T) { 509 for i, test := range []struct { 510 oldBranches []zoekt.RepositoryBranch 511 newBranches []zoekt.RepositoryBranch 512 expected bool 513 }{ 514 { 515 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 516 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v1"}}, 517 expected: true, 518 }, 519 { 520 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}, {Name: "release", Version: "v3"}}, 521 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v4"}}, 522 expected: true, 523 }, 524 { 525 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 526 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v2"}, {Name: "release", Version: "v1"}}, 527 expected: false, 528 }, 529 { 530 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 531 newBranches: []zoekt.RepositoryBranch{{Name: "release", Version: "v1"}}, 532 expected: false, 533 }, 534 { 535 oldBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 536 newBranches: []zoekt.RepositoryBranch{}, 537 expected: false, 538 }, 539 { 540 oldBranches: []zoekt.RepositoryBranch{}, 541 newBranches: []zoekt.RepositoryBranch{{Name: "main", Version: "v1"}}, 542 expected: false, 543 }, 544 } { 545 t.Run(strconv.Itoa(i), func(t *testing.T) { 546 actual := BranchNamesEqual(test.oldBranches, test.newBranches) 547 if test.expected != actual { 548 t.Errorf("expected: %t, got: %t", test.expected, actual) 549 } 550 }) 551 } 552} 553 554func TestBuilder_DeltaShardsBuildsShouldErrorOnBranchSet(t *testing.T) { 555 indexDir := t.TempDir() 556 557 repository := zoekt.Repository{ 558 Name: "repo", 559 ID: 1, 560 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "bar"}}, 561 } 562 createTestShard(t, indexDir, repository, 2) 563 564 repositoryNewBranches := zoekt.Repository{ 565 Name: "repo", 566 ID: 1, 567 Branches: []zoekt.RepositoryBranch{{Name: "foo"}, {Name: "baz"}}, 568 } 569 570 o := Options{ 571 IndexDir: indexDir, 572 RepositoryDescription: repositoryNewBranches, 573 IsDelta: true, 574 } 575 o.SetDefaults() 576 577 b, err := NewBuilder(o) 578 if err != nil { 579 t.Fatalf("NewBuilder: %v", err) 580 } 581 582 err = b.Finish() 583 if !errors.As(err, &deltaBranchSetError{}) { 584 t.Fatalf("expected error complaning about different branch names, got: %s", err) 585 } 586} 587 588func TestBuilder_DeltaShardsBuildsShouldErrorOnIndexOptionsMismatch(t *testing.T) { 589 repository := zoekt.Repository{ 590 Name: "repo", 591 ID: 1, 592 Branches: []zoekt.RepositoryBranch{{Name: "foo"}}, 593 } 594 595 for _, test := range []struct { 596 name string 597 options func(options *Options) 598 }{ 599 { 600 name: "update option CTagsPath to non default", 601 options: func(options *Options) { options.CTagsPath = "ctags_updated_test/universal-ctags" }, 602 }, 603 { 604 name: "update option DisableCTags to non default", 605 options: func(options *Options) { options.DisableCTags = true }, 606 }, 607 { 608 name: "update option SizeMax to non default", 609 options: func(options *Options) { options.SizeMax -= 10 }, 610 }, 611 { 612 name: "update option LargeFiles to non default", 613 options: func(options *Options) { options.LargeFiles = []string{"-large_file", "*.md", "-large_file", "*.yaml"} }, 614 }, 615 } { 616 test := test 617 618 t.Run(test.name, func(t *testing.T) { 619 indexDir := t.TempDir() 620 621 // initially use default options 622 createTestShard(t, indexDir, repository, 2) 623 624 o := Options{ 625 IndexDir: indexDir, 626 RepositoryDescription: repository, 627 IsDelta: true, 628 } 629 test.options(&o) 630 631 b, err := NewBuilder(o) 632 if err != nil { 633 t.Fatalf("NewBuilder: %v", err) 634 } 635 636 err = b.Finish() 637 if err == nil { 638 t.Fatalf("no error regarding index options mismatch") 639 } 640 641 var optionsMismatchError *deltaIndexOptionsMismatchError 642 if !errors.As(err, &optionsMismatchError) { 643 t.Fatalf("expected error complaining about index options mismatch, got: %s", err) 644 } 645 }) 646 } 647} 648 649func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) { 650 olderTime := time.Unix(0, 0) 651 newerTime := time.Unix(10000, 0) 652 653 for _, test := range []struct { 654 name string 655 originalRepository zoekt.Repository 656 updatedRepository zoekt.Repository 657 }{ 658 { 659 name: "update commit information", 660 originalRepository: zoekt.Repository{ 661 Name: "repo", 662 ID: 1, 663 Branches: []zoekt.RepositoryBranch{ 664 {Name: "main", Version: "v1"}, 665 {Name: "release", Version: "v1"}, 666 }, 667 }, 668 updatedRepository: zoekt.Repository{ 669 Name: "repo", 670 ID: 1, 671 Branches: []zoekt.RepositoryBranch{ 672 {Name: "main", Version: "v2"}, 673 {Name: "release", Version: "v2"}, 674 }, 675 }, 676 }, 677 { 678 name: "update latest commit date (older -> newer)", 679 originalRepository: zoekt.Repository{ 680 Name: "repo", 681 ID: 1, 682 Branches: []zoekt.RepositoryBranch{ 683 {Name: "main", Version: "v1"}, 684 }, 685 LatestCommitDate: olderTime, 686 }, 687 updatedRepository: zoekt.Repository{ 688 Name: "repo", 689 ID: 1, 690 Branches: []zoekt.RepositoryBranch{ 691 {Name: "main", Version: "v2"}, 692 }, 693 LatestCommitDate: newerTime, 694 }, 695 }, 696 { 697 name: "update latest commit date (even if latest commit date is older - the most recent commits are the source of truth)", 698 originalRepository: zoekt.Repository{ 699 Name: "repo", 700 ID: 1, 701 Branches: []zoekt.RepositoryBranch{ 702 {Name: "main", Version: "v1"}, 703 }, 704 LatestCommitDate: newerTime, 705 }, 706 updatedRepository: zoekt.Repository{ 707 Name: "repo", 708 ID: 1, 709 Branches: []zoekt.RepositoryBranch{ 710 {Name: "main", Version: "v2"}, 711 }, 712 LatestCommitDate: olderTime, 713 }, 714 }, 715 } { 716 test := test 717 718 t.Run(test.name, func(t *testing.T) { 719 indexDir := t.TempDir() 720 721 createTestShard(t, indexDir, test.originalRepository, 2, func(o *Options) { 722 o.DisableCTags = true 723 }) 724 725 shards := createTestShard(t, indexDir, test.updatedRepository, 1, func(o *Options) { 726 o.IsDelta = true 727 o.DisableCTags = true 728 }) 729 730 if len(shards) < 3 { 731 t.Fatalf("expected at least 3 shards, got %d (%s)", len(shards), strings.Join(shards, ", ")) 732 } 733 734 for _, s := range shards { 735 repositories, _, err := ReadMetadataPathAlive(s) 736 if err != nil { 737 t.Fatalf("reading repository metadata from shard %q", s) 738 } 739 740 var foundRepository *zoekt.Repository 741 for _, r := range repositories { 742 if r.ID == test.updatedRepository.ID { 743 foundRepository = r 744 break 745 } 746 } 747 748 if foundRepository == nil { 749 t.Fatalf("repository ID %d not in shard %q", test.updatedRepository.ID, s) 750 } 751 752 diffOptions := []cmp.Option{ 753 cmpopts.IgnoreUnexported(zoekt.Repository{}), 754 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"), 755 cmpopts.EquateEmpty(), 756 } 757 758 if diff := cmp.Diff(&test.updatedRepository, foundRepository, diffOptions...); diff != "" { 759 t.Errorf("shard %q: unexpected diff in repository metadata (-want +got):\n%s", s, diff) 760 } 761 } 762 }) 763 } 764} 765 766func TestFindRepositoryMetadata(t *testing.T) { 767 tests := []struct { 768 name string 769 normalShardRepositories []zoekt.Repository 770 compoundShardRepositories []zoekt.Repository 771 input *zoekt.Repository 772 expectedRepository *zoekt.Repository 773 expectedOk bool 774 }{ 775 { 776 name: "repository in normal shards", 777 normalShardRepositories: []zoekt.Repository{ 778 {Name: "repoA", ID: 1}, 779 {Name: "repoB", ID: 2}, 780 {Name: "repoC", ID: 3}, 781 }, 782 compoundShardRepositories: []zoekt.Repository{ 783 {Name: "repoD", ID: 4}, 784 {Name: "repoE", ID: 5}, 785 {Name: "repoF", ID: 6}, 786 }, 787 input: &zoekt.Repository{Name: "repoB", ID: 2}, 788 expectedRepository: &zoekt.Repository{Name: "repoB", ID: 2}, 789 expectedOk: true, 790 }, 791 { 792 name: "repository in compound shards", 793 normalShardRepositories: []zoekt.Repository{ 794 {Name: "repoA", ID: 1}, 795 {Name: "repoB", ID: 2}, 796 {Name: "repoC", ID: 3}, 797 }, 798 compoundShardRepositories: []zoekt.Repository{ 799 {Name: "repoD", ID: 4}, 800 {Name: "repoE", ID: 5}, 801 {Name: "repoF", ID: 6}, 802 }, 803 input: &zoekt.Repository{Name: "repoE", ID: 5}, 804 expectedRepository: &zoekt.Repository{Name: "repoE", ID: 5}, 805 expectedOk: true, 806 }, 807 { 808 name: "repository not in any shard", 809 normalShardRepositories: []zoekt.Repository{ 810 {Name: "repoA", ID: 1}, 811 {Name: "repoB", ID: 2}, 812 {Name: "repoC", ID: 3}, 813 }, 814 compoundShardRepositories: []zoekt.Repository{ 815 {Name: "repoD", ID: 4}, 816 {Name: "repoE", ID: 5}, 817 {Name: "repoF", ID: 6}, 818 }, 819 input: &zoekt.Repository{Name: "notPresent", ID: 123}, 820 expectedRepository: nil, 821 expectedOk: false, 822 }, 823 } 824 for _, tt := range tests { 825 t.Run(tt.name, func(t *testing.T) { 826 // setup 827 indexDir := t.TempDir() 828 829 optFns := []func(o *Options){ 830 // ctags aren't important for this test, and the equality checks 831 // for diffing repositories can break due to local configuration 832 func(o *Options) { 833 o.DisableCTags = true 834 }, 835 } 836 837 for _, r := range tt.normalShardRepositories { 838 createTestShard(t, indexDir, r, 1, optFns...) 839 } 840 841 if len(tt.compoundShardRepositories) > 0 { 842 createTestCompoundShard(t, indexDir, tt.compoundShardRepositories, optFns...) 843 } 844 845 o := &Options{ 846 IndexDir: indexDir, 847 RepositoryDescription: *tt.input, 848 } 849 o.SetDefaults() 850 851 // run test 852 got, _, gotOk, err := o.FindRepositoryMetadata() 853 if err != nil { 854 t.Errorf("received unexpected error: %v", err) 855 return 856 } 857 858 // check outcome 859 compareOptions := []cmp.Option{ 860 cmpopts.IgnoreUnexported(zoekt.Repository{}), 861 cmpopts.IgnoreFields(zoekt.Repository{}, "IndexOptions"), 862 cmpopts.EquateEmpty(), 863 } 864 865 if diff := cmp.Diff(tt.expectedRepository, got, compareOptions...); diff != "" { 866 t.Errorf("unexpected difference in repositories (-want +got):\n%s", diff) 867 } 868 869 if tt.expectedOk != gotOk { 870 t.Errorf("unexpected difference in 'ok' value: wanted %t, got %t", tt.expectedOk, gotOk) 871 } 872 }) 873 } 874} 875 876func TestIsLowPriority(t *testing.T) { 877 cases := []string{ 878 "builder_test.go", 879 "test/TestQuery.java", 880 "search/vendor/thirdparty.cc", 881 "search/node_modules/search/js", 882 "search.min.js", 883 "internal/search.js.map", 884 } 885 886 for _, tt := range cases { 887 t.Run(tt, func(t *testing.T) { 888 if !IsLowPriority(tt, nil) { 889 t.Errorf("expected file '%s' to be low priority", tt) 890 } 891 }) 892 } 893 894 negativeCases := []string{ 895 "builder.go", 896 "RoutesTrigger.java", 897 "search.js", 898 } 899 900 for _, tt := range negativeCases { 901 t.Run(tt, func(t *testing.T) { 902 if IsLowPriority(tt, nil) { 903 t.Errorf("did not expect file '%s' to be low priority", tt) 904 } 905 }) 906 } 907 908 // Explicitly check that content is important by using the same filename but 909 // different content. 910 normal := "package mock\n\nvar Mock struct {}" 911 generated := "// Code generated by mock\npackage mock\n\nvar Mock struct {}" 912 if IsLowPriority("mock.go", []byte(normal)) { 913 t.Error("expected non-generated content to not be low priority") 914 } 915 if !IsLowPriority("mock.go", []byte(generated)) { 916 t.Error("expected generated content to be low priority") 917 } 918} 919 920func createTestShard(t testing.TB, indexDir string, r zoekt.Repository, numShards int, optFns ...func(options *Options)) []string { 921 t.Helper() 922 923 if err := os.MkdirAll(filepath.Dir(indexDir), 0o700); err != nil { 924 t.Fatal(err) 925 } 926 927 o := Options{ 928 IndexDir: indexDir, 929 RepositoryDescription: r, 930 ShardMax: 75, // create a new shard every 75 bytes 931 } 932 o.SetDefaults() 933 934 for _, fn := range optFns { 935 fn(&o) 936 } 937 938 b, err := NewBuilder(o) 939 if err != nil { 940 t.Fatalf("NewBuilder: %v", err) 941 } 942 943 if numShards == 0 { 944 // We have to make at least 1 shard. 945 numShards = 1 946 } 947 948 for i := 0; i < numShards; i++ { 949 // Create entries (file + contents) that are ~100 bytes each. 950 // This (along with our shardMax setting of 75 bytes) means that each shard 951 // will contain at most one of these. 952 fileName := strconv.Itoa(i) 953 document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} 954 for _, branch := range o.RepositoryDescription.Branches { 955 document.Branches = append(document.Branches, branch.Name) 956 } 957 958 err := b.Add(document) 959 if err != nil { 960 t.Fatalf("failed to add file %q to builder: %s", fileName, err) 961 } 962 } 963 964 if err := b.Finish(); err != nil { 965 t.Fatalf("Finish: %v", err) 966 } 967 968 return o.FindAllShards() 969} 970 971func createTestCompoundShard(t testing.TB, indexDir string, repositories []zoekt.Repository, optFns ...func(options *Options)) { 972 t.Helper() 973 974 var shardNames []string 975 976 for _, r := range repositories { 977 // create an isolated scratch space to store normal shards for this repository 978 scratchDir := t.TempDir() 979 980 // create shards that'll be merged later 981 createTestShard(t, scratchDir, r, 1, optFns...) 982 983 // discover file names for all the normal shards we created 984 // note: this only looks in the immediate 'scratchDir' folder and doesn't recurse 985 shards, err := filepath.Glob(filepath.Join(scratchDir, "*.zoekt")) 986 if err != nil { 987 t.Fatalf("while globbing %q to find normal shards: %s", scratchDir, err) 988 } 989 990 shardNames = append(shardNames, shards...) 991 } 992 993 // load the normal shards that we created 994 var files []IndexFile 995 for _, shard := range shardNames { 996 f, err := os.Open(shard) 997 if err != nil { 998 t.Fatalf("opening shard file: %s", err) 999 } 1000 defer f.Close() 1001 1002 indexFile, err := NewIndexFile(f) 1003 if err != nil { 1004 t.Fatalf("creating index file: %s", err) 1005 } 1006 defer indexFile.Close() 1007 1008 files = append(files, indexFile) 1009 } 1010 1011 // merge all the simple shards into a compound shard 1012 tmpName, dstName, err := Merge(indexDir, files...) 1013 if err != nil { 1014 t.Fatalf("merging index files into compound shard: %s", err) 1015 } 1016 if err := os.Rename(tmpName, dstName); err != nil { 1017 t.Fatal(err) 1018 } 1019} 1020 1021func TestIgnoreSizeMax(t *testing.T) { 1022 for _, test := range []struct { 1023 name string 1024 largeFiles []string 1025 filePaths []string 1026 expected bool 1027 }{ 1028 { 1029 name: "empty pattern does nothing", 1030 largeFiles: []string{""}, 1031 filePaths: []string{"F0"}, 1032 expected: false, 1033 }, 1034 { 1035 name: "positive match allows", 1036 largeFiles: []string{"F0"}, 1037 filePaths: []string{"F0"}, 1038 expected: true, 1039 }, 1040 { 1041 name: "positive and negative patterns allows", 1042 largeFiles: []string{"F?", "!F0"}, 1043 filePaths: []string{"F1"}, 1044 expected: true, 1045 }, 1046 { 1047 name: "positive and negative patterns disallows", 1048 largeFiles: []string{"F?", "!F0"}, 1049 filePaths: []string{"F0"}, 1050 expected: false, 1051 }, 1052 { 1053 name: "positive escaped pattern allows", 1054 largeFiles: []string{"\\!F?"}, 1055 filePaths: []string{"!F0", "!F1"}, 1056 expected: true, 1057 }, 1058 { 1059 name: "postive escaped pattern does not disallow", 1060 largeFiles: []string{"F0", "\\!F?"}, 1061 filePaths: []string{"F0", "!F0"}, 1062 expected: true, 1063 }, 1064 { 1065 name: "combined meta and literal interpretation disallows", 1066 largeFiles: []string{"*F*", "!!F*"}, 1067 filePaths: []string{"!F0"}, 1068 expected: false, 1069 }, 1070 { 1071 name: "combined meta and literal interpretation allows", 1072 largeFiles: []string{"*F*", "!!F*"}, 1073 filePaths: []string{"F0"}, 1074 expected: true, 1075 }, 1076 { 1077 name: "largeFiles order: positive match overrides previous negative match and allows", 1078 largeFiles: []string{"F?", "!F0", "!F1", "F0"}, 1079 filePaths: []string{"F0"}, 1080 expected: true, 1081 }, 1082 { 1083 name: "largeFiles order: positive match overrides previous negative match and disallows", 1084 largeFiles: []string{"F?", "!F0", "!F1", "F0"}, 1085 filePaths: []string{"F1"}, 1086 expected: false, 1087 }, 1088 { 1089 name: "largeFiles order: negative match overrides previous positive match and allows", 1090 largeFiles: []string{"F?", "!?0", "F0", "!F0"}, 1091 filePaths: []string{"F1"}, 1092 expected: true, 1093 }, 1094 { 1095 name: "largeFiles order: negative match overrides previous positive match and disallows", 1096 largeFiles: []string{"F?", "!?0", "F0", "!F0"}, 1097 filePaths: []string{"F0"}, 1098 expected: false, 1099 }, 1100 } { 1101 t.Run(test.name, func(t *testing.T) { 1102 o := Options{ 1103 LargeFiles: test.largeFiles, 1104 } 1105 1106 for _, filePath := range test.filePaths { 1107 ignore := o.IgnoreSizeMax(filePath) 1108 if ignore != test.expected { 1109 t.Errorf("IgnoreSizeMax() for filepath %v returned unexpected result %v", filePath, ignore) 1110 } 1111 } 1112 }) 1113 } 1114} 1115 1116type filerankCase struct { 1117 name string 1118 docs []*Document 1119 want []int 1120} 1121 1122func testFileRankAspect(t *testing.T, c filerankCase) { 1123 var want []*Document 1124 for _, j := range c.want { 1125 want = append(want, c.docs[j]) 1126 } 1127 1128 got := make([]*Document, len(c.docs)) 1129 copy(got, c.docs) 1130 sortDocuments(got) 1131 1132 print := func(ds []*Document) string { 1133 r := "" 1134 for _, d := range ds { 1135 r += fmt.Sprintf("%v, ", d) 1136 } 1137 return r 1138 } 1139 if !reflect.DeepEqual(got, want) { 1140 t.Errorf("got docs [%v], want [%v]", print(got), print(want)) 1141 } 1142} 1143 1144func TestFileRank(t *testing.T) { 1145 for _, c := range []filerankCase{{ 1146 name: "filename", 1147 docs: []*Document{ 1148 { 1149 Name: "longlonglong", 1150 Content: []byte("bla"), 1151 }, 1152 { 1153 Name: "short", 1154 Content: []byte("bla"), 1155 }, 1156 }, 1157 want: []int{1, 0}, 1158 }, { 1159 name: "test", 1160 docs: []*Document{ 1161 { 1162 Name: "foo_test.go", 1163 Content: []byte("bla"), 1164 }, 1165 { 1166 Name: "longlonglong", 1167 Content: []byte("bla"), 1168 }, 1169 }, 1170 want: []int{1, 0}, 1171 }, { 1172 name: "content", 1173 docs: []*Document{ 1174 { 1175 Content: []byte("bla"), 1176 }, 1177 { 1178 Content: []byte("blablablabla"), 1179 }, 1180 { 1181 Content: []byte("blabla"), 1182 }, 1183 }, 1184 want: []int{0, 2, 1}, 1185 }, { 1186 name: "skipped docs", 1187 docs: []*Document{ 1188 { 1189 Name: "binary_file", 1190 SkipReason: "binary file", 1191 }, 1192 { 1193 Name: "some_test.go", 1194 Content: []byte("bla"), 1195 }, 1196 { 1197 Name: "large_file.go", 1198 SkipReason: "too large", 1199 }, 1200 { 1201 Name: "file.go", 1202 Content: []byte("blabla"), 1203 }, 1204 }, 1205 want: []int{3, 1, 0, 2}, 1206 }} { 1207 t.Run(c.name, func(t *testing.T) { 1208 testFileRankAspect(t, c) 1209 }) 1210 } 1211}